* [PATCH v5 14/21] IB/hns: Add operations support for IB device and port
From: Lijun Ou @ 2016-04-23 10:26 UTC (permalink / raw)
To: dledford, sean.hefty, hal.rosenstock, davem, jeffrey.t.kirsher,
jiri, ogerlitz, linuxarm
Cc: linux-rdma, linux-kernel, netdev, gongyangming, xiaokun,
tangchaofei, oulijun, haifeng.wei, yisen.zhuang, yankejian,
lisheng011, charles.chenxin
In-Reply-To: <1461407219-72027-1-git-send-email-oulijun@huawei.com>
This patch mainly registered some relative verbs for the kernel.
These operation funtions will be called by user. For example:
1. modify device
2. query device
3. query_port
4. modify_port
and so on.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu(Xavier) <xavier.huwei@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_device.h | 21 +++
drivers/infiniband/hw/hns/hns_roce_main.c | 229 +++++++++++++++++++++++++++-
drivers/infiniband/hw/hns/hns_roce_user.h | 17 +++
3 files changed, 266 insertions(+), 1 deletion(-)
create mode 100644 drivers/infiniband/hw/hns/hns_roce_user.h
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 6565b43..e871c60 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -22,6 +22,8 @@
#define DRV_NAME "hns_roce"
#define MAC_ADDR_OCTET_NUM 6
+#define HNS_ROCE_MAX_MSG_LEN 0x80000000
+
#define HNS_ROCE_BA_SIZE (32 * 4096)
@@ -35,7 +37,10 @@
#define HNS_ROCE_MAX_PORTS 6
#define HNS_ROCE_MAX_GID_NUM 16
+#define HNS_ROCE_GID_SIZE 16
+#define PKEY_ID 0xffff
+#define NODE_DESC_SIZE 64
#define ADDR_SHIFT_12 12
#define ADDR_SHIFT_32 32
#define ADDR_SHIFT_44 44
@@ -106,6 +111,11 @@ struct hns_roce_uar {
u32 index;
};
+struct hns_roce_ucontext {
+ struct ib_ucontext ibucontext;
+ struct hns_roce_uar uar;
+};
+
struct hns_roce_bitmap {
/* Bitmap Traversal last a bit which is 1 */
u32 last;
@@ -363,6 +373,17 @@ struct hns_roce_dev {
struct hns_roce_hw *hw;
};
+static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
+{
+ return container_of(ib_dev, struct hns_roce_dev, ib_dev);
+}
+
+static inline struct hns_roce_ucontext
+ *to_hr_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct hns_roce_ucontext, ibucontext);
+}
+
static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
{
__raw_writeq(*(u64 *) val, dest);
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8aa8c55..9c111ec 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -41,6 +41,7 @@
#include <rdma/ib_verbs.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
+#include "hns_roce_user.h"
#include "hns_roce_icm.h"
/**
@@ -341,6 +342,217 @@ int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev)
return ret;
}
+static int hns_roce_query_device(struct ib_device *ib_dev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ memset(props, 0, sizeof(*props));
+
+ props->fw_ver = hr_dev->fw_ver;
+ props->sys_image_guid = hr_dev->sys_image_guid;
+ props->max_mr_size = (u64)(~(0ULL));
+ props->page_size_cap = hr_dev->caps.page_size_cap;
+ props->vendor_id = hr_dev->vendor_id;
+ props->vendor_part_id = hr_dev->vendor_part_id;
+ props->hw_ver = hr_dev->hw_rev;
+ props->max_qp = hr_dev->caps.num_qps;
+ props->max_qp_wr = hr_dev->caps.max_wqes;
+ props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_LOCAL_DMA_LKEY;
+ props->max_sge = hr_dev->caps.max_sq_sg;
+ props->max_sge_rd = 1;
+ props->max_cq = hr_dev->caps.num_cqs;
+ props->max_cqe = hr_dev->caps.max_cqes;
+ props->max_mr = hr_dev->caps.num_mtpts;
+ props->max_pd = hr_dev->caps.num_pds;
+ props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
+ props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+
+ return 0;
+}
+
+static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct net_device *net_dev;
+ unsigned long flags;
+ enum ib_mtu mtu;
+ u8 port;
+
+ assert(port_num > 0);
+ port = port_num - 1;
+
+ memset(props, 0, sizeof(*props));
+
+ props->max_mtu = hr_dev->caps.max_mtu;
+ props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP |
+ IB_PORT_BOOT_MGMT_SUP;
+ props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN;
+ props->pkey_tbl_len = 1;
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = 1;
+
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+ net_dev = hr_dev->iboe.netdevs[port];
+ if (!net_dev) {
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+ dev_err(dev, "find netdev %d failed!\r\n", port);
+ return -EINVAL;
+ }
+
+ mtu = iboe_get_mtu(net_dev->mtu);
+ props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
+ props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3;
+
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+ return 0;
+}
+
+static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
+ union ib_gid *gid)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct device *dev = &hr_dev->pdev->dev;
+ u8 gid_idx = 0;
+ u8 port;
+
+ if (port_num < 1 || port_num > hr_dev->caps.num_ports ||
+ index >= hr_dev->caps.gid_table_len[port_num - 1]) {
+ dev_err(dev,
+ "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n",
+ port_num, index, hr_dev->caps.num_ports,
+ hr_dev->caps.gid_table_len[port_num - 1] - 1);
+ return -EINVAL;
+ }
+
+ port = port_num - 1;
+ gid_idx = hns_get_gid_index(hr_dev, port, index);
+ if (gid_idx >= HNS_ROCE_MAX_GID_NUM) {
+ dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n",
+ port_num, index, HNS_ROCE_MAX_GID_NUM);
+ return -EINVAL;
+ }
+
+ memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw,
+ HNS_ROCE_GID_SIZE);
+
+ return 0;
+}
+
+static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index,
+ u16 *pkey)
+{
+ *pkey = PKEY_ID;
+
+ return 0;
+}
+
+static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
+ struct ib_device_modify *props)
+{
+ unsigned long flags;
+
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ spin_lock_irqsave(&to_hr_dev(ib_dev)->sm_lock, flags);
+ memcpy(ib_dev->node_desc, props->node_desc, NODE_DESC_SIZE);
+ spin_unlock_irqrestore(&to_hr_dev(ib_dev)->sm_lock, flags);
+ }
+
+ return 0;
+}
+
+static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
+ struct ib_port_modify *props)
+{
+ return 0;
+}
+
+static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
+ struct ib_udata *udata)
+{
+ int ret = 0;
+ struct hns_roce_ucontext *context;
+ struct hns_roce_ib_alloc_ucontext_resp resp;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ resp.qp_tab_size = hr_dev->caps.num_qps;
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ ret = hns_roce_uar_alloc(hr_dev, &context->uar);
+ if (ret)
+ goto _error_fail_uar_alloc;
+
+ ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (ret)
+ goto _error_fail_copy_to_udata;
+
+ return &context->ibucontext;
+
+_error_fail_copy_to_udata:
+hns_roce_uar_free(hr_dev, &context->uar);
+
+_error_fail_uar_alloc:
+ kfree(context);
+
+ return ERR_PTR(ret);
+}
+
+static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+
+ hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
+ kfree(context);
+
+ return 0;
+}
+
+static int hns_roce_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
+ return -EINVAL;
+
+ if (vma->vm_pgoff == 0) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_hr_ucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
@@ -370,6 +582,22 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
ib_dev->uverbs_abi_ver = 1;
+ ib_dev->uverbs_cmd_mask =
+ (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ULL << IB_USER_VERBS_CMD_QUERY_PORT);
+
+ /* HCA||device||port */
+ ib_dev->modify_device = hns_roce_modify_device;
+ ib_dev->query_device = hns_roce_query_device;
+ ib_dev->query_port = hns_roce_query_port;
+ ib_dev->modify_port = hns_roce_modify_port;
+ ib_dev->get_link_layer = hns_roce_get_link_layer;
+ ib_dev->query_gid = hns_roce_query_gid;
+ ib_dev->query_pkey = hns_roce_query_pkey;
+ ib_dev->alloc_ucontext = hns_roce_alloc_ucontext;
+ ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
+ ib_dev->mmap = hns_roce_mmap;
ret = ib_register_device(ib_dev, NULL);
if (ret) {
@@ -411,7 +639,6 @@ _error_failed_setup_mtu_gids:
return ret;
}
-
int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
{
int i;
diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h
new file mode 100644
index 0000000..457e77a
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_user.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _HNS_ROCE_USER_H
+#define _HNS_ROCE_USER_H
+
+struct hns_roce_ib_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+};
+
+#endif /*_HNS_ROCE_USER_H */
--
1.9.1
^ permalink raw reply related
* [PATCH v5 15/21] IB/hns: Add PD operations support
From: Lijun Ou @ 2016-04-23 10:26 UTC (permalink / raw)
To: dledford, sean.hefty, hal.rosenstock, davem, jeffrey.t.kirsher,
jiri, ogerlitz, linuxarm
Cc: linux-rdma, linux-kernel, netdev, gongyangming, xiaokun,
tangchaofei, oulijun, haifeng.wei, yisen.zhuang, yankejian,
lisheng011, charles.chenxin
In-Reply-To: <1461407219-72027-1-git-send-email-oulijun@huawei.com>
This patch added the verbs to operate PD. It mainly includes
the functions of allocating PD and deallocating PD.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu(Xavier) <xavier.huwei@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_device.h | 17 ++++++++
drivers/infiniband/hw/hns/hns_roce_main.c | 8 +++-
drivers/infiniband/hw/hns/hns_roce_pd.c | 62 +++++++++++++++++++++++++++++
3 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index e871c60..68d78e9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -116,6 +116,11 @@ struct hns_roce_ucontext {
struct hns_roce_uar uar;
};
+struct hns_roce_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+};
+
struct hns_roce_bitmap {
/* Bitmap Traversal last a bit which is 1 */
u32 last;
@@ -384,6 +389,11 @@ static inline struct hns_roce_ucontext
return container_of(ibucontext, struct hns_roce_ucontext, ibucontext);
}
+static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct hns_roce_pd, ibpd);
+}
+
static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
{
__raw_writeq(*(u64 *) val, dest);
@@ -431,6 +441,13 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, u32 obj,
int cnt);
+struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, u32 *pdn);
+void hns_roce_pd_free(struct hns_roce_dev *hr_dev, u32 pdn);
+int hns_roce_dealloc_pd(struct ib_pd *pd);
+
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 9c111ec..eeae944 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -585,7 +585,9 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_cmd_mask =
(1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_PORT);
+ (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD);
/* HCA||device||port */
ib_dev->modify_device = hns_roce_modify_device;
@@ -599,6 +601,10 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
ib_dev->mmap = hns_roce_mmap;
+ /* PD */
+ ib_dev->alloc_pd = hns_roce_alloc_pd;
+ ib_dev->dealloc_pd = hns_roce_dealloc_pd;
+
ret = ib_register_device(ib_dev, NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index fb0f7c65..57739eb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -17,6 +17,28 @@
#include "hns_roce_common.h"
#include "hns_roce_device.h"
+int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, u32 *pdn)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 pd_number;
+ int ret = 0;
+
+ ret = hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, &pd_number);
+ if (ret == -1) {
+ dev_err(dev, "alloc pdn from pdbitmap failed\n");
+ return -ENOMEM;
+ }
+
+ *pdn = pd_number;
+
+ return 0;
+}
+
+void hns_roce_pd_free(struct hns_roce_dev *hr_dev, u32 pdn)
+{
+ hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn);
+}
+
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
{
return hns_roce_bitmap_init(&hr_dev->pd_bitmap, hr_dev->caps.num_pds,
@@ -29,6 +51,46 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev)
hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap);
}
+struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_pd *pd;
+ int ret;
+
+ pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
+ if (ret) {
+ kfree(pd);
+ dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n");
+ return ERR_PTR(ret);
+ }
+
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+ hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
+ dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ return &pd->ibpd;
+}
+
+int hns_roce_dealloc_pd(struct ib_pd *pd)
+{
+ hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
+ kfree(to_hr_pd(pd));
+
+ return 0;
+}
+
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
struct resource *res;
--
1.9.1
^ permalink raw reply related
* [PATCH v5 17/21] IB/hns: Add QP operation implemention support
From: Lijun Ou @ 2016-04-23 10:26 UTC (permalink / raw)
To: dledford, sean.hefty, hal.rosenstock, davem, jeffrey.t.kirsher,
jiri, ogerlitz, linuxarm
Cc: linux-rdma, linux-kernel, netdev, gongyangming, xiaokun,
tangchaofei, oulijun, haifeng.wei, yisen.zhuang, yankejian,
lisheng011, charles.chenxin
In-Reply-To: <1461407219-72027-1-git-send-email-oulijun@huawei.com>
This patch was implemention for queue pair operations. QP Consists
of a Send Work Queue and a Receive Work Queue. Send and receive
queues are always created as a pair and remain that way throughout
their lifetime. A Queue Pair is identified by its Queue Pair Number.
QP operations implemention as follows:
1. create QP. When a QP is created, a complete set of initial
attributes must be specified by the Consumer.
2. query QP. Returns the attribute list and current values for
the specified QP.
3. modify QP. modify QP relative attributes by it.
4. destroy QP. When a QP is destroyed, any outstanding Work Requests
are no longer considered to be in the scope of the Channel Interface.
It is the responsibility of the Consumer to be able to clean up
any resources
5. post send request. Builds one or more WQEs for the Send Queue in
the specified QP.
6. post receive request. Builds one or more WQEs for the receive Queue in
the specified QP.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu(Xavier) <xavier.huwei@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_alloc.c | 134 +++
drivers/infiniband/hw/hns/hns_roce_cmd.c | 236 +++-
drivers/infiniband/hw/hns/hns_roce_cmd.h | 54 +-
drivers/infiniband/hw/hns/hns_roce_common.h | 58 +
drivers/infiniband/hw/hns/hns_roce_device.h | 168 +++
drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1642 +++++++++++++++++++++++++++
drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 626 ++++++++++
drivers/infiniband/hw/hns/hns_roce_icm.c | 55 +
drivers/infiniband/hw/hns/hns_roce_icm.h | 11 +-
drivers/infiniband/hw/hns/hns_roce_main.c | 14 +-
drivers/infiniband/hw/hns/hns_roce_mr.c | 160 +++
drivers/infiniband/hw/hns/hns_roce_qp.c | 765 +++++++++++++
drivers/infiniband/hw/hns/hns_roce_user.h | 6 +
13 files changed, 3912 insertions(+), 17 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 0c76f1b..5bcffe6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -47,6 +47,45 @@ void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, u32 obj)
hns_roce_bitmap_free_range(bitmap, obj, 1);
}
+int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
+ int align, u32 *obj)
+{
+ int ret = 0;
+ int i;
+
+ if (likely(cnt == 1 && align == 1))
+ return hns_roce_bitmap_alloc(bitmap, obj);
+
+ spin_lock(&bitmap->lock);
+
+ *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
+ bitmap->last, cnt, align - 1);
+ if (*obj >= bitmap->max) {
+ bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+ & bitmap->mask;
+ *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, 0,
+ cnt, align - 1);
+ }
+
+ if (*obj < bitmap->max) {
+ for (i = 0; i < cnt; i++)
+ set_bit(*obj + i, bitmap->table);
+
+ if (*obj == bitmap->last) {
+ bitmap->last = (*obj + cnt);
+ if (bitmap->last >= bitmap->max)
+ bitmap->last = 0;
+ }
+ *obj |= bitmap->top;
+ } else {
+ ret = -1;
+ }
+
+ spin_unlock(&bitmap->lock);
+
+ return ret;
+}
+
void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, u32 obj,
int cnt)
{
@@ -94,6 +133,101 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
kfree(bitmap->table);
}
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
+ struct hns_roce_buf *buf)
+{
+ int i;
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 bits_per_long = BITS_PER_LONG;
+
+ if (buf->nbufs == 1) {
+ dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
+ } else {
+ if (bits_per_long == 64)
+ vunmap(buf->direct.buf);
+
+ for (i = 0; i < buf->nbufs; ++i)
+ if (buf->page_list[i].buf)
+ dma_free_coherent(&hr_dev->pdev->dev, PAGE_SIZE,
+ buf->page_list[i].buf,
+ buf->page_list[i].map);
+ kfree(buf->page_list);
+ }
+}
+
+int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
+ struct hns_roce_buf *buf)
+{
+ int i = 0;
+ dma_addr_t t;
+ struct page **pages;
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 bits_per_long = BITS_PER_LONG;
+
+ /* SQ/RQ buf lease than one page, SQ + RQ = 8K */
+ if (size <= max_direct) {
+ buf->nbufs = 1;
+ /* Npages calculated by page_size */
+ buf->npages = 1 << get_order(size);
+ buf->page_shift = PAGE_SHIFT;
+ /* MTT PA must be recorded in 4k alignment, t is 4k aligned */
+ buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL);
+ if (!buf->direct.buf)
+ return -ENOMEM;
+
+ buf->direct.map = t;
+
+ while (t & ((1 << buf->page_shift) - 1)) {
+ --buf->page_shift;
+ buf->npages *= 2;
+ }
+
+ memset(buf->direct.buf, 0, size);
+ } else {
+ buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ buf->npages = buf->nbufs;
+ buf->page_shift = PAGE_SHIFT;
+ buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
+ GFP_KERNEL);
+
+ if (!buf->page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < buf->nbufs; ++i) {
+ buf->page_list[i].buf = dma_alloc_coherent(dev,
+ PAGE_SIZE, &t,
+ GFP_KERNEL);
+
+ if (!buf->page_list[i].buf)
+ goto err_free;
+
+ buf->page_list[i].map = t;
+ memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+ }
+ if (bits_per_long == 64) {
+ pages = kmalloc_array(buf->nbufs, sizeof(*pages),
+ GFP_KERNEL);
+ if (!pages)
+ goto err_free;
+
+ for (i = 0; i < buf->nbufs; ++i)
+ pages[i] = virt_to_page(buf->page_list[i].buf);
+
+ buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP,
+ PAGE_KERNEL);
+ kfree(pages);
+ if (!buf->direct.buf)
+ goto err_free;
+ }
+ }
+
+ return 0;
+
+err_free:
+ hns_roce_buf_free(hr_dev, size, buf);
+ return -ENOMEM;
+}
+
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
hns_roce_cleanup_qp_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index b55c200..bc78054 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -7,16 +7,32 @@
* (at your option) any later version.
*/
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
+#define CMD_POLL_TOKEN 0xffff
#define CMD_MAX_NUM 32
+#define STATUS_MASK 0xff
+
+enum {
+ HCR_TOKEN_OFFSET = 0x14,
+ HCR_STATUS_OFFSET = 0x18,
+ HCR_GO_BIT = 15,
+};
+
+enum {
+ GO_BIT_TIMEOUT_MSECS = 10000,
+};
static int hns_roce_status_to_errno(u8 orig_status)
{
@@ -26,6 +42,185 @@ static int hns_roce_status_to_errno(u8 orig_status)
return -EIO;
}
+static int cmd_pending(struct hns_roce_dev *hr_dev)
+{
+ u32 status = roce_readl(hr_dev->cmd.hcr + HCR_TOKEN_OFFSET);
+
+ return (!!(status & (1 << HCR_GO_BIT)));
+}
+
+static int hns_roce_cmd_post(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 out_param, u32 in_modifier, u8 op_modifier,
+ u16 op, u16 token, int event)
+{
+ struct hns_roce_cmdq *cmd = &hr_dev->cmd;
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 __iomem *hcr = (u32 *)cmd->hcr;
+ int ret = -EAGAIN;
+ unsigned long end;
+ u32 val = 0;
+
+ mutex_lock(&cmd->hcr_mutex);
+
+ end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
+ while (cmd_pending(hr_dev)) {
+ if (time_after(jiffies, end)) {
+ dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies,
+ (int)end);
+ goto out;
+ }
+ cond_resched();
+ }
+
+ roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
+ op);
+ roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
+ ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
+ roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
+ roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
+ roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M,
+ ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
+
+ __raw_writeq(cpu_to_le64(in_param), hcr + 0);
+ __raw_writeq(cpu_to_le64(out_param), hcr + 2);
+ __raw_writel(cpu_to_le32(in_modifier), hcr + 4);
+ /* Memory barrier */
+ wmb();
+
+ __raw_writel(cpu_to_le32(val), hcr + 5);
+
+ mmiowb();
+ ret = 0;
+
+out:
+ mutex_unlock(&cmd->hcr_mutex);
+ return ret;
+}
+
+
+static int hns_roce_cmd_poll(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 *out_param, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ u8 __iomem *hcr = hr_dev->cmd.hcr;
+ unsigned long end = 0;
+ u32 status = 0;
+ int ret;
+
+ down(&hr_dev->cmd.poll_sem);
+
+ ret = hns_roce_cmd_post(hr_dev, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, CMD_POLL_TOKEN,
+ 0);
+ if (ret) {
+ dev_err(dev, "[cmd_poll]hns_roce_cmd_post failed\n");
+ goto out;
+ }
+
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (cmd_pending(hr_dev) && time_before(jiffies, end))
+ cond_resched();
+
+ if (cmd_pending(hr_dev)) {
+ dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n");
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+
+ status = le32_to_cpu((__force __be32)
+ __raw_readl(hcr + HCR_STATUS_OFFSET));
+ if ((status & STATUS_MASK) != 0x1) {
+ dev_err(dev, "mailbox status 0x%x!\n", status);
+ ret = -EBUSY;
+ goto out;
+ }
+
+out:
+ up(&hr_dev->cmd.poll_sem);
+ return ret;
+}
+
+void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
+ u64 out_param)
+{
+ struct hns_roce_cmd_context
+ *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask];
+
+ if (token != context->token)
+ return;
+
+ context->result = hns_roce_status_to_errno(status);
+ context->out_param = out_param;
+ complete(&context->done);
+}
+
+static int hns_roce_cmd_wait(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 *out_param, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct hns_roce_cmdq *cmd = &hr_dev->cmd;
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_cmd_context *context;
+ int ret = 0;
+
+ down(&cmd->event_sem);
+
+ spin_lock(&cmd->context_lock);
+ WARN_ON(cmd->free_head < 0);
+ context = &cmd->context[cmd->free_head];
+ context->token += cmd->token_mask + 1;
+ cmd->free_head = context->next;
+ spin_unlock(&cmd->context_lock);
+
+ init_completion(&context->done);
+
+ ret = hns_roce_cmd_post(hr_dev, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, context->token,
+ 1);
+ if (ret)
+ goto out;
+
+ /*
+ * It is timeout when wait_for_completion_timeout return 0
+ * The return value is the time limit set in advance
+ * how many seconds showing
+ */
+ if (!wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(timeout))) {
+ dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = context->result;
+ if (ret) {
+ dev_err(dev, "[cmd]event mod cmd process error!err=%d\n", ret);
+ goto out;
+ }
+
+out:
+ spin_lock(&cmd->context_lock);
+ context->next = cmd->free_head;
+ cmd->free_head = context - cmd->context;
+ spin_unlock(&cmd->context_lock);
+
+ up(&cmd->event_sem);
+ return ret;
+}
+
+int __hns_roce_cmd(struct hns_roce_dev *hr_dev, u64 in_param, u64 *out_param,
+ u32 in_modifier, u8 op_modifier, u16 op,
+ unsigned long timeout)
+{
+ if (hr_dev->cmd.use_events)
+ return hns_roce_cmd_wait(hr_dev, in_param, out_param,
+ in_modifier, op_modifier, op, timeout);
+ else
+ return hns_roce_cmd_poll(hr_dev, in_param, out_param,
+ in_modifier, op_modifier, op, timeout);
+}
+
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
{
struct device *dev = &hr_dev->pdev->dev;
@@ -99,16 +294,31 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
up(&hr_cmd->poll_sem);
}
-void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
- u64 out_param)
+struct hns_roce_cmd_mailbox
+ *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_cmd_context
- *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask];
+ struct hns_roce_cmd_mailbox *mailbox;
- if (token != context->token)
+ mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL,
+ &mailbox->dma);
+ if (!mailbox->buf) {
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mailbox;
+}
+
+void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox)
+{
+ if (!mailbox)
return;
- context->result = hns_roce_status_to_errno(status);
- context->out_param = out_param;
- complete(&context->done);
+ dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 4e102a4..9583546 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -13,7 +13,59 @@
#include <linux/dma-mapping.h>
enum {
- HNS_ROCE_MAILBOX_SIZE = 4096
+ /* QP/EE commands */
+ HNS_ROCE_CMD_RST2INIT_QP = 0x19,
+ HNS_ROCE_CMD_INIT2RTR_QP = 0x1a,
+ HNS_ROCE_CMD_RTR2RTS_QP = 0x1b,
+ HNS_ROCE_CMD_RTS2RTS_QP = 0x1c,
+ HNS_ROCE_CMD_2ERR_QP = 0x1e,
+ HNS_ROCE_CMD_RTS2SQD_QP = 0x1f,
+ HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
+ HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
+ HNS_ROCE_CMD_2RST_QP = 0x21,
+ HNS_ROCE_CMD_QUERY_QP = 0x22,
};
+enum {
+ HNS_ROCE_CMD_TIME_CLASS_A = 10000,
+ HNS_ROCE_CMD_TIME_CLASS_C = 10000,
+};
+
+enum {
+ HNS_ROCE_MAILBOX_SIZE = 4096,
+};
+
+struct hns_roce_cmd_mailbox {
+ void *buf;
+ dma_addr_t dma;
+};
+
+int __hns_roce_cmd(struct hns_roce_dev *hr_dev, u64 in_param, u64 *out_param,
+ u32 in_modifier, u8 op_modifier, u16 op,
+ unsigned long timeout);
+
+/* Invoke a command with no output parameter */
+static inline int hns_roce_cmd(struct hns_roce_dev *hr_dev, u64 in_param,
+ u32 in_modifier, u8 op_modifier, u16 op,
+ unsigned long timeout)
+{
+ return __hns_roce_cmd(hr_dev, in_param, NULL, in_modifier,
+ op_modifier, op, timeout);
+}
+
+/* Invoke a command with an output mailbox */
+static inline int hns_roce_cmd_box(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 out_param, u32 in_modifier,
+ u8 op_modifier, u16 op,
+ unsigned long timeout)
+{
+ return __hns_roce_cmd(hr_dev, in_param, &out_param, in_modifier,
+ op_modifier, op, timeout);
+}
+
+struct hns_roce_cmd_mailbox
+ *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
+void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox);
+
#endif /* _HNS_ROCE_CMD_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index db32a52..202e21e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -137,6 +137,44 @@
#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31
+#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0
+#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \
+ (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S)
+
+#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0
+#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \
+ (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S)
+
+#define ROCEE_MB6_ROCEE_MB_CMD_S 0
+#define ROCEE_MB6_ROCEE_MB_CMD_M \
+ (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S)
+
+#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8
+#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \
+ (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S)
+
+#define ROCEE_MB6_ROCEE_MB_EVENT_S 14
+
+#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15
+
+#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16
+#define ROCEE_MB6_ROCEE_MB_TOKEN_M \
+ (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S)
+
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \
+ (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S)
+
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \
+ (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S)
+
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \
+ (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S)
+
+#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31
+
#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0
#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \
(((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S)
@@ -176,6 +214,18 @@
#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0
+#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0
+#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \
+ (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
+
+#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0
+#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \
+ (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
+
+#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
+#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \
+ (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
+
/*************ROCEE_REG DEFINITION****************/
#define ROCEE_VENDOR_ID_REG 0x0
#define ROCEE_VENDOR_PART_ID_REG 0x4
@@ -195,6 +245,8 @@
#define ROCEE_SMAC_L_0_REG 0x240
#define ROCEE_SMAC_H_0_REG 0x244
+#define ROCEE_QP1C_CFG3_0_REG 0x27C
+
#define ROCEE_CAEP_AEQE_CONS_IDX_REG 0x3AC
#define ROCEE_CAEP_CEQC_CONS_IDX_0_REG 0x3BC
@@ -230,6 +282,9 @@
#define ROCEE_BT_CMD_L_REG 0x200
#define ROCEE_MB1_REG 0x210
+#define ROCEE_DB_SQ_L_0_REG 0x230
+#define ROCEE_DB_OTHERS_L_0_REG 0x238
+#define ROCEE_QP1C_CFG0_0_REG 0x270
#define ROCEE_CAEP_AEQC_AEQE_SHIFT_REG 0x3A0
#define ROCEE_CAEP_CEQC_SHIFT_0_REG 0x3B0
@@ -238,6 +293,9 @@
#define ROCEE_CAEP_AE_MASK_REG 0x6C8
#define ROCEE_CAEP_AE_ST_REG 0x6CC
+#define ROCEE_SDB_ISSUE_PTR_REG 0x758
+#define ROCEE_SDB_SEND_PTR_REG 0x75C
+#define ROCEE_SDB_INV_CNT_REG 0x9A4
#define ROCEE_ECC_UCERR_ALM0_REG 0xB34
#define ROCEE_ECC_CERR_ALM0_REG 0xB40
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index f74786b..e204a7f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -24,8 +24,15 @@
#define MAC_ADDR_OCTET_NUM 6
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
+#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
+
+#define HNS_ROCE_IB_MIN_SQ_STRIDE 6
+
#define HNS_ROCE_BA_SIZE (32 * 4096)
+/* Hardware specification only for v1 engine */
+#define HNS_ROCE_MIN_WQE_NUM 0x20
+
#define HNS_ROCE_MAX_IRQ_NUM 34
#define HNS_ROCE_MAX_PORTS 6
@@ -51,6 +58,16 @@
#define PAGES_SHIFT_16 16
+enum hns_roce_qp_state {
+ HNS_ROCE_QP_STATE_RST = 0,
+ HNS_ROCE_QP_STATE_INIT = 1,
+ HNS_ROCE_QP_STATE_RTR = 2,
+ HNS_ROCE_QP_STATE_RTS = 3,
+ HNS_ROCE_QP_STATE_SQD = 4,
+ HNS_ROCE_QP_STATE_ERR = 5,
+ HNS_ROCE_QP_NUM_STATE
+};
+
enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01,
HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02,
@@ -110,6 +127,10 @@ enum {
#define HNS_ROCE_PORT_DOWN 0
#define HNS_ROCE_PORT_UP 1
+#define HNS_ROCE_MTT_ENTRY_PER_SEG 8
+
+#define PAGE_ADDR_SHIFT 12
+
struct hns_roce_uar {
u64 pfn;
u32 index;
@@ -167,6 +188,12 @@ struct hns_roce_icm_table {
struct hns_roce_icm **icm;
};
+struct hns_roce_mtt {
+ u32 first_seg;
+ int order;
+ int page_shift;
+};
+
struct hns_roce_mr_table {
struct hns_roce_bitmap mtpt_bitmap;
struct hns_roce_buddy mtt_buddy;
@@ -174,19 +201,57 @@ struct hns_roce_mr_table {
struct hns_roce_icm_table mtpt_table;
};
+struct hns_roce_wq {
+ u64 *wrid; /* Work request ID */
+ spinlock_t lock;
+ int wqe_cnt; /* WQE num */
+ u32 max_post;
+ int max_gs;
+ int offset;
+ int wqe_shift;/* WQE size */
+ u32 head;
+ u32 tail;
+ void __iomem *db_reg_l;
+};
+
struct hns_roce_buf_list {
void *buf;
dma_addr_t map;
};
+struct hns_roce_buf {
+ struct hns_roce_buf_list direct;
+ struct hns_roce_buf_list *page_list;
+ int nbufs;
+ u32 npages;
+ int page_shift;
+};
+
+struct hns_roce_cq_buf {
+ struct hns_roce_buf hr_buf;
+};
+
struct hns_roce_cq {
+ struct ib_cq ib_cq;
+ struct hns_roce_cq_buf hr_buf;
+ /* pointer to store information after resize*/
+ spinlock_t lock;
void (*comp)(struct hns_roce_cq *);
void (*event)(struct hns_roce_cq *, enum hns_roce_event);
+ u32 cq_depth;
+ u32 cons_index;
+ void __iomem *cq_db_l;
+ u32 cqn;
atomic_t refcount;
struct completion free;
};
+struct hns_roce_srq {
+ struct ib_srq ibsrq;
+ int srqn;
+};
+
struct hns_roce_uar_table {
struct hns_roce_bitmap bitmap;
};
@@ -268,13 +333,38 @@ struct hns_roce_cmdq {
struct hns_roce_dev;
struct hns_roce_qp {
+ struct ib_qp ibqp;
+ struct hns_roce_buf hr_buf;
+ struct hns_roce_wq rq;
+ __le32 doorbell_qpn;
+ __le32 sq_signal_bits;
+ u32 sq_next_wqe;
+ int sq_max_wqes_per_wr;
+ int sq_spare_wqes;
+ struct hns_roce_wq sq;
+
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ u32 buff_size;
+ struct mutex mutex;
+ u8 port;
+ u8 sl;
+ u8 resp_depth;
+ u8 state;
+ u32 access_flags;
+ u32 pkey_index;
void (*event)(struct hns_roce_qp *,
enum hns_roce_event);
+ int qpn;
atomic_t refcount;
struct completion free;
};
+struct hns_roce_sqp {
+ struct hns_roce_qp hr_qp;
+};
+
struct hns_roce_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[HNS_ROCE_MAX_PORTS];
@@ -361,6 +451,17 @@ struct hns_roce_hw {
void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
enum ib_mtu mtu);
+ int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+ int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state);
+ int (*destroy_qp)(struct ib_qp *ibqp);
+ int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+ int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+
void *priv;
};
@@ -419,6 +520,26 @@ static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
return container_of(ibah, struct hns_roce_ah, ibah);
}
+static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct hns_roce_qp, ibqp);
+}
+
+static inline struct hns_roce_cq *to_hr_cq(struct ib_cq *ib_cq)
+{
+ return container_of(ib_cq, struct hns_roce_cq, ib_cq);
+}
+
+static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct hns_roce_srq, ibsrq);
+}
+
+static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
+{
+ return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
+}
+
static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
{
__raw_writeq(*(u64 *) val, dest);
@@ -431,6 +552,17 @@ static inline struct hns_roce_qp
qpn & (hr_dev->caps.num_qps - 1));
}
+static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
+{
+ u32 bits_per_long_val = BITS_PER_LONG;
+
+ if (bits_per_long_val == 64 || buf->nbufs == 1)
+ return (char *)(buf->direct.buf) + offset;
+ else
+ return (char *)(buf->page_list[offset >> PAGE_SHIFT].buf) +
+ (offset & (PAGE_SIZE - 1));
+}
+
int hns_roce_init_uar_table(struct hns_roce_dev *dev);
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
@@ -443,6 +575,13 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev);
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
+int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
+ struct hns_roce_mtt *mtt);
+void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt);
+int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
+
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
@@ -477,6 +616,35 @@ int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, u32 *pdn);
void hns_roce_pd_free(struct hns_roce_dev *hr_dev, u32 pdn);
int hns_roce_dealloc_pd(struct ib_pd *pd);
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
+ struct hns_roce_buf *buf);
+int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
+ struct hns_roce_buf *buf);
+
+int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt, struct ib_umem *umem);
+
+struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
+void *get_send_wqe(struct hns_roce_qp *hr_qp, int n);
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+ struct ib_cq *ib_cq);
+enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
+ struct hns_roce_cq *recv_cq);
+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
+ struct hns_roce_cq *recv_cq);
+void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
+ int cnt);
+__be32 send_ieth(struct ib_send_wr *wr);
+int to_hr_qp_type(int qp_type);
+
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b9d396b..3bbf570 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -19,8 +19,374 @@
#include <linux/platform_device.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_icm.h"
#include "hns_roce_hw_v1.h"
+static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->lkey = cpu_to_le32(sg->lkey);
+ dseg->addr = cpu_to_le64(sg->addr);
+ dseg->len = cpu_to_le32(sg->length);
+}
+
+static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
+ u32 rkey)
+{
+ rseg->raddr = cpu_to_le64(remote_addr);
+ rseg->rkey = cpu_to_le32(rkey);
+ rseg->len = 0;
+}
+
+int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+ struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL;
+ struct hns_roce_wqe_ctrl_seg *ctrl = NULL;
+ struct hns_roce_wqe_data_seg *dseg = NULL;
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_sq_db sq_db;
+ int ps_opcode = 0, i = 0;
+ unsigned long flags = 0;
+ void *wqe = NULL;
+ u32 doorbell[2];
+ int nreq = 0;
+ u32 ind = 0;
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ ind = qp->sq_next_wqe;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ dev_err(dev, "hns_roce_wq_overflow error\n");
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+ dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
+ wr->num_sge, qp->sq.max_gs);
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
+ qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
+ wr->wr_id;
+
+ /* Corresponding to the RC and RD type wqe process separately */
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ ud_sq_wqe = wqe;
+ roce_set_field(ud_sq_wqe->dmac_h,
+ UD_SEND_WQE_U32_4_DMAC_0_M,
+ UD_SEND_WQE_U32_4_DMAC_0_S,
+ ah->av.mac[0]);
+ roce_set_field(ud_sq_wqe->dmac_h,
+ UD_SEND_WQE_U32_4_DMAC_1_M,
+ UD_SEND_WQE_U32_4_DMAC_1_S,
+ ah->av.mac[1]);
+ roce_set_field(ud_sq_wqe->dmac_h,
+ UD_SEND_WQE_U32_4_DMAC_2_M,
+ UD_SEND_WQE_U32_4_DMAC_2_S,
+ ah->av.mac[2]);
+ roce_set_field(ud_sq_wqe->dmac_h,
+ UD_SEND_WQE_U32_4_DMAC_3_M,
+ UD_SEND_WQE_U32_4_DMAC_3_S,
+ ah->av.mac[3]);
+
+ roce_set_field(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_DMAC_4_M,
+ UD_SEND_WQE_U32_8_DMAC_4_S,
+ ah->av.mac[4]);
+ roce_set_field(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_DMAC_5_M,
+ UD_SEND_WQE_U32_8_DMAC_5_S,
+ ah->av.mac[5]);
+ roce_set_field(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
+ UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
+ HNS_ROCE_WQE_OPCODE_SEND);
+ roce_set_field(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M,
+ UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S,
+ 2);
+ roce_set_bit(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S,
+ 1);
+
+ ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ?
+ cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
+ ((wr->opcode == IB_WR_SEND_WITH_IMM) ?
+ cpu_to_le32(HNS_ROCE_WQE_IMM) : 0);
+
+ roce_set_field(ud_sq_wqe->u32_16,
+ UD_SEND_WQE_U32_16_DEST_QP_M,
+ UD_SEND_WQE_U32_16_DEST_QP_S,
+ ud_wr(wr)->remote_qpn);
+ roce_set_field(ud_sq_wqe->u32_16,
+ UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M,
+ UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S,
+ ah->av.stat_rate);
+
+ roce_set_field(ud_sq_wqe->u32_36,
+ UD_SEND_WQE_U32_36_FLOW_LABEL_M,
+ UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0);
+ roce_set_field(ud_sq_wqe->u32_36,
+ UD_SEND_WQE_U32_36_PRIORITY_M,
+ UD_SEND_WQE_U32_36_PRIORITY_S,
+ ah->av.sl_tclass_flowlabel >>
+ HNS_ROCE_SL_SHIFT);
+ roce_set_field(ud_sq_wqe->u32_36,
+ UD_SEND_WQE_U32_36_SGID_INDEX_M,
+ UD_SEND_WQE_U32_36_SGID_INDEX_S,
+ hns_get_gid_index(hr_dev, qp->port,
+ ah->av.gid_index));
+
+ roce_set_field(ud_sq_wqe->u32_40,
+ UD_SEND_WQE_U32_40_HOP_LIMIT_M,
+ UD_SEND_WQE_U32_40_HOP_LIMIT_S,
+ ah->av.hop_limit);
+ roce_set_field(ud_sq_wqe->u32_40,
+ UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
+ UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0);
+
+ memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
+
+ ud_sq_wqe->va0_l = (u32)wr->sg_list[0].addr;
+ ud_sq_wqe->va0_h = (wr->sg_list[0].addr) >>
+ ADDR_SHIFT_32;
+ ud_sq_wqe->l_key0 = wr->sg_list[0].lkey;
+
+ ud_sq_wqe->va1_l = (u32)wr->sg_list[1].addr;
+ ud_sq_wqe->va1_h = (wr->sg_list[1].addr) >>
+ ADDR_SHIFT_32;
+ ud_sq_wqe->l_key1 = wr->sg_list[1].lkey;
+ ind++;
+ } else if (ibqp->qp_type == IB_QPT_RC) {
+ ctrl = wqe;
+ memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg));
+ for (i = 0; i < wr->num_sge; i++)
+ ctrl->msg_length += wr->sg_list[i].length;
+
+ ctrl->sgl_pa_h = 0;
+ ctrl->flag = 0;
+ ctrl->imm_data = send_ieth(wr);
+
+ /*Ctrl field, ctrl set type: sig, solic, imm, fence */
+ /* SO wait for conforming application scenarios */
+ ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ?
+ cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
+ ((wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
+ cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) |
+ (wr->send_flags & IB_SEND_FENCE ?
+ (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0);
+
+ wqe = (struct hns_roce_wqe_ctrl_seg *)wqe +
+ sizeof(struct hns_roce_wqe_ctrl_seg);
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ;
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE;
+ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+ atomic_wr(wr)->rkey);
+ break;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ case IB_WR_SEND_WITH_IMM:
+ ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
+ break;
+ case IB_WR_LOCAL_INV:
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ case IB_WR_LSO:
+ default:
+ ps_opcode = HNS_ROCE_WQE_OPCODE_MASK;
+ break;
+ }
+ ctrl->flag |= cpu_to_le32(ps_opcode);
+ wqe = (struct hns_roce_wqe_raddr_seg *)wqe +
+ sizeof(struct hns_roce_wqe_raddr_seg);
+
+ dseg = wqe;
+ if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
+ if (ctrl->msg_length >
+ hr_dev->caps.max_sq_inline) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ dev_err(dev, "inline len(1-%d)=%d, illegal",
+ ctrl->msg_length,
+ hr_dev->caps.max_sq_inline);
+ goto out;
+ }
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(wqe, ((void *) (uintptr_t)
+ wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ wqe = (struct hns_roce_wqe_raddr_seg *)
+ wqe + wr->sg_list[i].length;
+ }
+ ctrl->flag |= HNS_ROCE_WQE_INLINE;
+ } else {
+ /*sqe num is two */
+ for (i = 0; i < wr->num_sge; i++)
+ set_data_seg(dseg + i, wr->sg_list + i);
+
+ ctrl->flag |= cpu_to_le32(wr->num_sge <<
+ HNS_ROCE_WQE_SGE_NUM_BIT);
+ }
+ ind++;
+ } else {
+ dev_dbg(dev, "unSupported QP type\n");
+ break;
+ }
+ }
+
+out:
+ /* Set DB return */
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+ /* Memory barrier */
+ wmb();
+
+ sq_db.u32_4 = 0;
+ sq_db.u32_8 = 0;
+ roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
+ SQ_DOORBELL_U32_4_SQ_HEAD_S,
+ (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
+ roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
+ SQ_DOORBELL_U32_4_PORT_S, qp->port);
+ roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
+ SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
+ roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
+
+ doorbell[0] = sq_db.u32_4;
+ doorbell[1] = sq_db.u32_8;
+
+ hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
+ qp->sq_next_wqe = ind;
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return ret;
+}
+
+int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int ret = 0;
+ int nreq = 0;
+ int ind = 0;
+ int i = 0;
+ u32 reg_val = 0;
+ u32 *addr = NULL;
+ unsigned long flags = 0;
+ struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
+ struct hns_roce_wqe_data_seg *scat = NULL;
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct device *dev = &hr_dev->pdev->dev;
+
+ spin_lock_irqsave(&hr_qp->rq.lock, flags);
+ ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
+ hr_qp->ibqp.recv_cq)) {
+ dev_err(dev, "hns_roce_wq_overflow error\n");
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
+ dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
+ wr->num_sge, hr_qp->rq.max_gs);
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ ctrl = get_recv_wqe(hr_qp, ind);
+
+ roce_set_field(ctrl->rwqe_byte_12,
+ RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
+ RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S,
+ wr->num_sge);
+
+ scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1);
+
+ for (i = 0; i < wr->num_sge; i++)
+ set_data_seg(scat + i, wr->sg_list + i);
+
+ hr_qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+ }
+
+out:
+ if (likely(nreq)) {
+ hr_qp->rq.head += nreq;
+ /* Memory barrier */
+ wmb();
+
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ /* SW update GSI rq header */
+ addr = (u32 *)(to_hr_dev(ibqp->device)->reg_base +
+ ROCEE_QP1C_CFG3_0_REG +
+ QP1C_CFGN_OFFSET * hr_qp->port);
+ reg_val = roce_readl(addr);
+ roce_set_field(reg_val,
+ ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
+ ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
+ hr_qp->rq.head);
+ roce_writel(reg_val, addr);
+ } else {
+ uint32_t doorbell[2] = {0};
+ struct hns_roce_rq_db rq_db;
+
+ rq_db.u32_4 = 0;
+ rq_db.u32_8 = 0;
+
+ roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
+ RQ_DOORBELL_U32_4_RQ_HEAD_S,
+ hr_qp->rq.head);
+ roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M,
+ RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
+ roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M,
+ RQ_DOORBELL_U32_8_CMD_S, 1);
+ roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
+ 1);
+
+ doorbell[0] = rq_db.u32_4;
+ doorbell[1] = rq_db.u32_8;
+
+ hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
+ }
+ }
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+
+ return ret;
+}
+
void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, int sdb_mode,
int odb_mode)
{
@@ -662,6 +1028,1277 @@ void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
phy_port * PHY_PORT_OFFSET);
}
+static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
+{
+ return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
+ n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
+}
+
+static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
+{
+ struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe);
+
+ /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
+ return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^
+ !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL;
+}
+
+void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index,
+ spinlock_t *doorbell_lock)
+
+{
+ u32 doorbell[2];
+
+ doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
+ roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
+ roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
+ ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
+ roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
+ ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0);
+ roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
+ ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn);
+
+ hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
+}
+
+static void hns_roce_v1_clean_cq(struct hns_roce_cq *hr_cq, u32 qpn,
+ struct hns_roce_srq *srq)
+{
+ struct hns_roce_cqe *cqe, *dest;
+ u32 prod_index;
+ int nfreed = 0;
+ u8 owner_bit;
+
+ for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index);
+ ++prod_index) {
+ if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
+ break;
+ }
+
+ /*
+ * Now backwards through the CQ, removing CQ entries
+ * that match our QP by overwriting them with next entries.
+ */
+ while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
+ cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
+ if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
+ CQE_BYTE_16_LOCAL_QPN_S) &
+ HNS_ROCE_CQE_QPN_MASK) == qpn) {
+ /* In v1 engine, not support SRQ */
+ ++nfreed;
+ } else if (nfreed) {
+ dest = get_cqe(hr_cq, (prod_index + nfreed) &
+ hr_cq->ib_cq.cqe);
+ owner_bit = roce_get_bit(dest->cqe_byte_4,
+ CQE_BYTE_4_OWNER_S);
+ memcpy(dest, cqe, sizeof(*cqe));
+ roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S,
+ owner_bit);
+ }
+ }
+
+ if (nfreed) {
+ hr_cq->cons_index += nfreed;
+ /*
+ * Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
+ wmb();
+
+ hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index,
+ &to_hr_dev(hr_cq->ib_cq.device)->cq_db_lock);
+ }
+}
+
+static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
+ struct hns_roce_srq *srq)
+{
+ spin_lock_irq(&hr_cq->lock);
+ hns_roce_v1_clean_cq(hr_cq, qpn, srq);
+ spin_unlock_irq(&hr_cq->lock);
+}
+
+static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt,
+ enum hns_roce_qp_state cur_state,
+ enum hns_roce_qp_state new_state,
+ struct hns_roce_qp_context *context,
+ struct hns_roce_qp *hr_qp)
+{
+ static const u16
+ op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = {
+ [HNS_ROCE_QP_STATE_RST] = {
+ [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
+ [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
+ [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
+ },
+ [HNS_ROCE_QP_STATE_INIT] = {
+ [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
+ [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
+ /* Note: In v1 engine, HW doesn't support RST2INIT.
+ * We use RST2INIT cmd instead of INIT2INIT.
+ */
+ [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
+ [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP,
+ },
+ [HNS_ROCE_QP_STATE_RTR] = {
+ [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
+ [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
+ [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP,
+ },
+ [HNS_ROCE_QP_STATE_RTS] = {
+ [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
+ [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
+ [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP,
+ [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP,
+ },
+ [HNS_ROCE_QP_STATE_SQD] = {
+ [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
+ [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
+ [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP,
+ [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP,
+ },
+ [HNS_ROCE_QP_STATE_ERR] = {
+ [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
+ [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
+ }
+ };
+
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = &hr_dev->pdev->dev;
+ int ret = 0;
+
+ if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
+ new_state >= HNS_ROCE_QP_NUM_STATE ||
+ !op[cur_state][new_state]) {
+ dev_err(dev, "[modify_qp]not support state %d to %d\n",
+ cur_state, new_state);
+ return -EINVAL;
+ }
+
+ if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
+ return hns_roce_cmd(hr_dev, 0, hr_qp->qpn, 2,
+ HNS_ROCE_CMD_2RST_QP,
+ HNS_ROCE_CMD_TIME_CLASS_A);
+
+ if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
+ return hns_roce_cmd(hr_dev, 0, hr_qp->qpn, 2,
+ HNS_ROCE_CMD_2ERR_QP,
+ HNS_ROCE_CMD_TIME_CLASS_A);
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ dev_err(dev, "[modify_qp]mailboc alloc failed!\n");
+ return PTR_ERR(mailbox);
+ }
+
+ memcpy(mailbox->buf, context, sizeof(*context));
+
+ ret = hns_roce_cmd(hr_dev, mailbox->dma, hr_qp->qpn, 0,
+ op[cur_state][new_state], HNS_ROCE_CMD_TIME_CLASS_C);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_sqp_context *context;
+ struct device *dev = &hr_dev->pdev->dev;
+ dma_addr_t dma_handle = 0;
+ int rq_pa_start;
+ u32 reg_val;
+ u64 *mtts;
+ u32 *addr;
+
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ /* Search QP buf's MTTs */
+ mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
+ hr_qp->mtt.first_seg, &dma_handle);
+ if (!mtts) {
+ dev_err(dev, "qp buf pa find failed\n");
+ goto out;
+ }
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ roce_set_field(context->qp1c_bytes_4,
+ QP1C_BYTES_4_SQ_WQE_SHIFT_M,
+ QP1C_BYTES_4_SQ_WQE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+ roce_set_field(context->qp1c_bytes_4,
+ QP1C_BYTES_4_RQ_WQE_SHIFT_M,
+ QP1C_BYTES_4_RQ_WQE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+ roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
+ QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
+
+ context->sq_rq_bt_l = (u32)(dma_handle);
+ roce_set_field(context->qp1c_bytes_12,
+ QP1C_BYTES_12_SQ_RQ_BT_H_M,
+ QP1C_BYTES_12_SQ_RQ_BT_H_S,
+ ((u32)(dma_handle >> ADDR_SHIFT_32)));
+
+ roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M,
+ QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head);
+ roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M,
+ QP1C_BYTES_16_PORT_NUM_S, hr_qp->port);
+ roce_set_bit(context->qp1c_bytes_16,
+ QP1C_BYTES_16_SIGNALING_TYPE_S,
+ hr_qp->sq_signal_bits);
+ roce_set_bit(context->qp1c_bytes_16,
+ QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S,
+ hr_qp->sq_signal_bits);
+ roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
+ 1);
+ roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
+ 1);
+ roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S,
+ 0);
+
+ roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M,
+ QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head);
+ roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M,
+ QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
+
+ rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
+ context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
+
+ roce_set_field(context->qp1c_bytes_28,
+ QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
+ QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S,
+ (mtts[rq_pa_start]) >> ADDR_SHIFT_32);
+ roce_set_field(context->qp1c_bytes_28,
+ QP1C_BYTES_28_RQ_CUR_IDX_M,
+ QP1C_BYTES_28_RQ_CUR_IDX_S, 0);
+
+ roce_set_field(context->qp1c_bytes_32,
+ QP1C_BYTES_32_RX_CQ_NUM_M,
+ QP1C_BYTES_32_RX_CQ_NUM_S,
+ to_hr_cq(ibqp->recv_cq)->cqn);
+ roce_set_field(context->qp1c_bytes_32,
+ QP1C_BYTES_32_TX_CQ_NUM_M,
+ QP1C_BYTES_32_TX_CQ_NUM_S,
+ to_hr_cq(ibqp->send_cq)->cqn);
+
+ context->cur_sq_wqe_ba_l = (u32)mtts[0];
+
+ roce_set_field(context->qp1c_bytes_40,
+ QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
+ QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S,
+ (mtts[0]) >> ADDR_SHIFT_32);
+ roce_set_field(context->qp1c_bytes_40,
+ QP1C_BYTES_40_SQ_CUR_IDX_M,
+ QP1C_BYTES_40_SQ_CUR_IDX_S, 0);
+
+ /* Copy context to QP1C register */
+ addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG +
+ hr_qp->port * sizeof(*context));
+
+ roce_writel(context->qp1c_bytes_4, addr);
+ roce_writel(context->sq_rq_bt_l, addr + 1);
+ roce_writel(context->qp1c_bytes_12, addr + 2);
+ roce_writel(context->qp1c_bytes_16, addr + 3);
+ roce_writel(context->qp1c_bytes_20, addr + 4);
+ roce_writel(context->cur_rq_wqe_ba_l, addr + 5);
+ roce_writel(context->qp1c_bytes_28, addr + 6);
+ roce_writel(context->qp1c_bytes_32, addr + 7);
+ roce_writel(context->cur_sq_wqe_ba_l, addr + 8);
+ }
+
+ /* Modify QP1C status */
+ addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG +
+ hr_qp->port * sizeof(*context));
+ reg_val = roce_readl(addr);
+ roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
+ ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
+ roce_writel(reg_val, addr);
+
+ hr_qp->state = new_state;
+ if (new_state == IB_QPS_RESET) {
+ hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
+ ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
+ if (ibqp->send_cq != ibqp->recv_cq)
+ hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
+ hr_qp->qpn, NULL);
+
+ hr_qp->rq.head = 0;
+ hr_qp->rq.tail = 0;
+ hr_qp->sq.head = 0;
+ hr_qp->sq.tail = 0;
+ hr_qp->sq_next_wqe = 0;
+ }
+
+ kfree(context);
+ return 0;
+
+out:
+ kfree(context);
+ return -EINVAL;
+}
+
+static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_qp_context *context;
+ dma_addr_t dma_handle_2 = 0;
+ dma_addr_t dma_handle = 0;
+ int rq_pa_start = 0;
+ u64 *mtts_2 = NULL;
+ int ret = -EINVAL;
+ u64 *mtts = NULL;
+ int port;
+ u8 *dmac;
+ u8 *smac;
+
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ /* Search qp buf's mtts */
+ mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
+ hr_qp->mtt.first_seg, &dma_handle);
+ if (mtts == NULL) {
+ dev_err(dev, "qp buf pa find failed\n");
+ goto out;
+ }
+
+ /* Search IRRL's mtts */
+ mtts_2 = hns_roce_table_find(&hr_dev->qp_table.irrl_table, hr_qp->qpn,
+ &dma_handle_2);
+ if (mtts_2 == NULL) {
+ dev_err(dev, "qp irrl_table find failed\n");
+ goto out;
+ }
+
+ /*
+ *Reset to init
+ * Mandatory param:
+ * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
+ * Optional param: NA
+ */
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
+ QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
+ to_hr_qp_type(hr_qp->ibqp.qp_type));
+
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
+ !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
+ !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ );
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S,
+ !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ );
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
+ QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
+ QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_PD_M,
+ QP_CONTEXT_QPC_BYTES_4_PD_S,
+ to_hr_pd(ibqp->pd)->pdn);
+ hr_qp->access_flags = attr->qp_access_flags;
+ roce_set_field(context->qpc_bytes_8,
+ QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
+ QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
+ to_hr_cq(ibqp->send_cq)->cqn);
+ roce_set_field(context->qpc_bytes_8,
+ QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
+ QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
+ to_hr_cq(ibqp->recv_cq)->cqn);
+
+ if (ibqp->srq)
+ roce_set_field(context->qpc_bytes_12,
+ QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
+ QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
+ to_hr_srq(ibqp->srq)->srqn);
+
+ roce_set_field(context->qpc_bytes_12,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
+ attr->pkey_index);
+ hr_qp->pkey_index = attr->pkey_index;
+ roce_set_field(context->qpc_bytes_16,
+ QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
+ QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
+
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
+ QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
+ to_hr_qp_type(hr_qp->ibqp.qp_type));
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
+ !!(attr->qp_access_flags &
+ IB_ACCESS_REMOTE_READ));
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
+ !!(attr->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE));
+ } else {
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
+ !!(hr_qp->access_flags &
+ IB_ACCESS_REMOTE_READ));
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
+ !!(hr_qp->access_flags &
+ IB_ACCESS_REMOTE_WRITE));
+ }
+
+ roce_set_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
+ QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->sq.wqe_cnt));
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
+ QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
+ ilog2((unsigned int)hr_qp->rq.wqe_cnt));
+ roce_set_field(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTES_4_PD_M,
+ QP_CONTEXT_QPC_BYTES_4_PD_S,
+ to_hr_pd(ibqp->pd)->pdn);
+
+ roce_set_field(context->qpc_bytes_8,
+ QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
+ QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
+ to_hr_cq(ibqp->send_cq)->cqn);
+ roce_set_field(context->qpc_bytes_8,
+ QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
+ QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
+ to_hr_cq(ibqp->recv_cq)->cqn);
+
+ if (ibqp->srq)
+ roce_set_field(context->qpc_bytes_12,
+ QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
+ QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
+ to_hr_srq(ibqp->srq)->srqn);
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ roce_set_field(context->qpc_bytes_12,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
+ attr->pkey_index);
+ else
+ roce_set_field(context->qpc_bytes_12,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
+ hr_qp->pkey_index);
+
+ roce_set_field(context->qpc_bytes_16,
+ QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
+ QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ if ((attr_mask & IB_QP_ALT_PATH) ||
+ (attr_mask & IB_QP_ACCESS_FLAGS) ||
+ (attr_mask & IB_QP_PKEY_INDEX) ||
+ (attr_mask & IB_QP_QKEY)) {
+ dev_err(dev, "INIT2RTR attr_mask error\n");
+ goto out;
+ }
+
+ dmac = (u8 *)attr->ah_attr.dmac;
+
+ context->sq_rq_bt_l = (u32)(dma_handle);
+ roce_set_field(context->qpc_bytes_24,
+ QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
+ QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
+ ((u32)(dma_handle >> ADDR_SHIFT_32)));
+ roce_set_bit(context->qpc_bytes_24,
+ QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S,
+ 1);
+ roce_set_field(context->qpc_bytes_24,
+ QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
+ QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
+ attr->min_rnr_timer);
+ context->irrl_ba_l = (u32)(dma_handle_2);
+ roce_set_field(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
+ QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
+ ((u32)(dma_handle_2 >> 32)) &
+ QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M);
+ roce_set_field(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M,
+ QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0);
+ roce_set_bit(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S,
+ 1);
+ roce_set_bit(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
+ hr_qp->sq_signal_bits);
+
+ for (port = 0; port < hr_dev->caps.num_ports; port++) {
+ smac = (u8 *)hr_dev->dev_addr[port];
+ dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
+ smac[0], smac[1], smac[2], smac[3], smac[4],
+ smac[5]);
+ if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
+ (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
+ (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
+ roce_set_bit(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
+ 1);
+ break;
+ }
+ }
+
+ if (hr_dev->loop_idc == 0x1)
+ roce_set_bit(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+
+ roce_set_bit(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
+ attr->ah_attr.ah_flags);
+ roce_set_field(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
+ QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S,
+ ilog2((unsigned int)attr->max_dest_rd_atomic));
+
+ roce_set_field(context->qpc_bytes_36,
+ QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
+ QP_CONTEXT_QPC_BYTES_36_DEST_QP_S,
+ attr->dest_qp_num);
+
+ /* Configure GID index */
+ roce_set_field(context->qpc_bytes_36,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S,
+ hns_get_gid_index(hr_dev,
+ attr->ah_attr.port_num - 1,
+ attr->ah_attr.grh.sgid_index));
+
+ memcpy(&(context->dmac_l), dmac, 4);
+
+ roce_set_field(context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
+ QP_CONTEXT_QPC_BYTES_44_DMAC_H_S,
+ *((u16 *)(&dmac[4])));
+ roce_set_field(context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M,
+ QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S,
+ attr->ah_attr.static_rate);
+ roce_set_field(context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_S,
+ attr->ah_attr.grh.hop_limit);
+
+ roce_set_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S,
+ attr->ah_attr.grh.flow_label);
+ roce_set_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_S,
+ attr->ah_attr.grh.traffic_class);
+ roce_set_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_MTU_M,
+ QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu);
+
+ memcpy(context->dgid, attr->ah_attr.grh.dgid.raw,
+ sizeof(attr->ah_attr.grh.dgid.raw));
+
+ dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l,
+ roce_get_field(context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
+ QP_CONTEXT_QPC_BYTES_44_DMAC_H_S));
+
+ roce_set_field(context->qpc_bytes_68,
+ QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M,
+ QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S, 0);
+ roce_set_field(context->qpc_bytes_68,
+ QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
+
+ rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
+ context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
+
+ roce_set_field(context->qpc_bytes_76,
+ QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
+ QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S,
+ mtts[rq_pa_start] >> ADDR_SHIFT_32);
+ roce_set_field(context->qpc_bytes_76,
+ QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M,
+ QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0);
+
+ context->rx_rnr_time = 0;
+
+ roce_set_field(context->qpc_bytes_84,
+ QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M,
+ QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S,
+ attr->rq_psn - 1);
+ roce_set_field(context->qpc_bytes_84,
+ QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M,
+ QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0);
+
+ roce_set_field(context->qpc_bytes_88,
+ QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
+ QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S,
+ attr->rq_psn);
+ roce_set_bit(context->qpc_bytes_88,
+ QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0);
+ roce_set_bit(context->qpc_bytes_88,
+ QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0);
+ roce_set_field(context->qpc_bytes_88,
+ QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M,
+ QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S,
+ 0);
+ roce_set_field(context->qpc_bytes_88,
+ QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M,
+ QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S,
+ 0);
+
+ context->dma_length = 0;
+ context->r_key = 0;
+ context->va_l = 0;
+ context->va_h = 0;
+
+ roce_set_field(context->qpc_bytes_108,
+ QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M,
+ QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0);
+ roce_set_bit(context->qpc_bytes_108,
+ QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0);
+ roce_set_bit(context->qpc_bytes_108,
+ QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0);
+
+ roce_set_field(context->qpc_bytes_112,
+ QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M,
+ QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0);
+ roce_set_field(context->qpc_bytes_112,
+ QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M,
+ QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0);
+
+ /* For chip resp ack */
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
+ QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
+ hr_qp->port);
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_SL_M,
+ QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
+ hr_qp->sl = attr->ah_attr.sl;
+ } else if (cur_state == IB_QPS_RTR &&
+ new_state == IB_QPS_RTS) {
+ /* If exist optional param, return error */
+ if ((attr_mask & IB_QP_ALT_PATH) ||
+ (attr_mask & IB_QP_ACCESS_FLAGS) ||
+ (attr_mask & IB_QP_QKEY) ||
+ (attr_mask & IB_QP_PATH_MIG_STATE) ||
+ (attr_mask & IB_QP_CUR_STATE) ||
+ (attr_mask & IB_QP_MIN_RNR_TIMER)) {
+ dev_err(dev, "RTR2RTS attr_mask error\n");
+ goto out;
+ }
+
+ context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
+
+ roce_set_field(context->qpc_bytes_120,
+ QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
+ QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S,
+ (mtts[0]) >> ADDR_SHIFT_32);
+
+ roce_set_field(context->qpc_bytes_124,
+ QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M,
+ QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0);
+ roce_set_field(context->qpc_bytes_124,
+ QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M,
+ QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0);
+
+ roce_set_field(context->qpc_bytes_128,
+ QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M,
+ QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S,
+ attr->sq_psn);
+ roce_set_bit(context->qpc_bytes_128,
+ QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0);
+ roce_set_field(context->qpc_bytes_128,
+ QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M,
+ QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S,
+ 0);
+ roce_set_bit(context->qpc_bytes_128,
+ QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0);
+
+ roce_set_field(context->qpc_bytes_132,
+ QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M,
+ QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0);
+ roce_set_field(context->qpc_bytes_132,
+ QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M,
+ QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0);
+
+ roce_set_field(context->qpc_bytes_136,
+ QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M,
+ QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S,
+ attr->sq_psn);
+ roce_set_field(context->qpc_bytes_136,
+ QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M,
+ QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S,
+ attr->sq_psn);
+
+ roce_set_field(context->qpc_bytes_140,
+ QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M,
+ QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S,
+ (attr->sq_psn >> SQ_PSN_SHIFT));
+ roce_set_field(context->qpc_bytes_140,
+ QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M,
+ QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0);
+ roce_set_bit(context->qpc_bytes_140,
+ QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0);
+
+ roce_set_field(context->qpc_bytes_144,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_S,
+ attr->qp_state);
+
+ roce_set_field(context->qpc_bytes_148,
+ QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M,
+ QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0);
+ roce_set_field(context->qpc_bytes_148,
+ QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
+ QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S, 0);
+ roce_set_field(context->qpc_bytes_148,
+ QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M,
+ QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S, 0);
+ roce_set_field(context->qpc_bytes_148,
+ QP_CONTEXT_QPC_BYTES_148_LSN_M,
+ QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100);
+
+ context->rnr_retry = 0;
+
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M,
+ QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S,
+ attr->retry_cnt);
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
+ QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
+ attr->timeout);
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M,
+ QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S,
+ attr->rnr_retry);
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
+ QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
+ hr_qp->port);
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_SL_M,
+ QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
+ hr_qp->sl = attr->ah_attr.sl;
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
+ QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S,
+ ilog2((unsigned int)attr->max_rd_atomic));
+ roce_set_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M,
+ QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0);
+ context->pkt_use_len = 0;
+
+ roce_set_field(context->qpc_bytes_164,
+ QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
+ QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn);
+ roce_set_field(context->qpc_bytes_164,
+ QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M,
+ QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0);
+
+ roce_set_field(context->qpc_bytes_168,
+ QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M,
+ QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S,
+ attr->sq_psn);
+ roce_set_field(context->qpc_bytes_168,
+ QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M,
+ QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0);
+ roce_set_field(context->qpc_bytes_168,
+ QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M,
+ QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0);
+ roce_set_bit(context->qpc_bytes_168,
+ QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0);
+ roce_set_bit(context->qpc_bytes_168,
+ QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0);
+ roce_set_bit(context->qpc_bytes_168,
+ QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0);
+ context->sge_use_len = 0;
+
+ roce_set_field(context->qpc_bytes_176,
+ QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0);
+ roce_set_field(context->qpc_bytes_176,
+ QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S,
+ 0);
+ roce_set_field(context->qpc_bytes_180,
+ QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0);
+ roce_set_field(context->qpc_bytes_180,
+ QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
+ QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
+
+ context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
+
+ roce_set_field(context->qpc_bytes_188,
+ QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
+ QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S,
+ (mtts[0]) >> ADDR_SHIFT_32);
+ roce_set_bit(context->qpc_bytes_188,
+ QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0);
+ roce_set_field(context->qpc_bytes_188,
+ QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S,
+ 0);
+ } else if ((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
+ (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
+ (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
+ (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
+ (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
+ (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
+ (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
+ (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
+ roce_set_field(context->qpc_bytes_144,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_S,
+ attr->qp_state);
+
+ } else {
+ dev_err(dev, "not support this modify\n");
+ goto out;
+ }
+
+ /* Every status migrate must change state */
+ roce_set_field(context->qpc_bytes_144,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state);
+
+ /* SW pass context to HW */
+ ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
+ to_hns_roce_state(cur_state),
+ to_hns_roce_state(new_state), context,
+ hr_qp);
+ if (ret) {
+ dev_err(dev, "hns_roce_qp_modify failed\n");
+ goto out;
+ }
+
+ /*
+ * Use rst2init to instead of init2init with drv,
+ * need to hw to flash RQ HEAD by DB again
+ */
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
+ u32 reg_val = 0;
+ u32 *addr = NULL;
+
+ /* Memory barrier */
+ wmb();
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ /* SW update GSI rq header */
+ addr = (u32 *)(hr_dev->reg_base +
+ ROCEE_QP1C_CFG3_0_REG +
+ QP1C_CFGN_OFFSET * hr_qp->port);
+ reg_val = roce_readl(addr);
+ roce_set_field(reg_val,
+ ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
+ ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
+ hr_qp->rq.head);
+ roce_writel(reg_val, addr);
+ } else {
+ uint32_t doorbell[2] = {0};
+ struct hns_roce_rq_db rq_db;
+
+ rq_db.u32_4 = 0;
+ rq_db.u32_8 = 0;
+
+ roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
+ RQ_DOORBELL_U32_4_RQ_HEAD_S,
+ hr_qp->rq.head);
+ roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M,
+ RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
+ roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M,
+ RQ_DOORBELL_U32_8_CMD_S, 1);
+ roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
+ 1);
+
+ doorbell[0] = rq_db.u32_4;
+ doorbell[1] = rq_db.u32_8;
+
+ hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
+ }
+ }
+
+ hr_qp->state = new_state;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ hr_qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT)
+ hr_qp->port = (attr->port_num - 1);
+
+ if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
+ ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
+ if (ibqp->send_cq != ibqp->recv_cq)
+ hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
+ hr_qp->qpn, NULL);
+
+ hr_qp->rq.head = 0;
+ hr_qp->rq.tail = 0;
+ hr_qp->sq.head = 0;
+ hr_qp->sq.tail = 0;
+ hr_qp->sq_next_wqe = 0;
+ }
+out:
+ kfree(context);
+ return ret;
+}
+
+int hns_roce_v1_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
+ return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state,
+ new_state);
+ else
+ return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state,
+ new_state);
+}
+
+static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state)
+{
+ switch (state) {
+ case HNS_ROCE_QP_STATE_RST:
+ return IB_QPS_RESET;
+ case HNS_ROCE_QP_STATE_INIT:
+ return IB_QPS_INIT;
+ case HNS_ROCE_QP_STATE_RTR:
+ return IB_QPS_RTR;
+ case HNS_ROCE_QP_STATE_RTS:
+ return IB_QPS_RTS;
+ case HNS_ROCE_QP_STATE_SQD:
+ return IB_QPS_SQD;
+ case HNS_ROCE_QP_STATE_ERR:
+ return IB_QPS_ERR;
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_qp_context *hr_context)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ dev_err(&hr_dev->pdev->dev, "Alloc mailbox failed\n");
+ return PTR_ERR(mailbox);
+ }
+
+ ret = hns_roce_cmd_box(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
+ HNS_ROCE_CMD_QUERY_QP,
+ HNS_ROCE_CMD_TIME_CLASS_A);
+ if (!ret)
+ memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
+ else
+ dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n");
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_qp_context *context;
+ int tmp_qp_state = 0;
+ int ret = 0;
+ int state;
+
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (hr_qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context);
+ if (ret) {
+ dev_err(dev, "query qpc error\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ state = roce_get_field(context->qpc_bytes_144,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_S);
+ tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state);
+ if (tmp_qp_state == -1) {
+ dev_err(dev, "to_ib_qp_state error\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ hr_qp->state = (u8)tmp_qp_state;
+ qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
+ qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_MTU_M,
+ QP_CONTEXT_QPC_BYTES_48_MTU_S);
+ qp_attr->path_mig_state = IB_MIG_ARMED;
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD)
+ qp_attr->qkey = QKEY_VAL;
+
+ qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88,
+ QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
+ QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S);
+ qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164,
+ QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
+ QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S);
+ qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36,
+ QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
+ QP_CONTEXT_QPC_BYTES_36_DEST_QP_S);
+ qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) |
+ ((roce_get_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) |
+ ((roce_get_bit(context->qpc_bytes_4,
+ QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3);
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_UC) {
+ qp_attr->ah_attr.sl = roce_get_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_SL_M,
+ QP_CONTEXT_QPC_BYTES_156_SL_S);
+ qp_attr->ah_attr.grh.flow_label = roce_get_field(
+ context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
+ QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
+ qp_attr->ah_attr.grh.sgid_index = roce_get_field(
+ context->qpc_bytes_36,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
+ qp_attr->ah_attr.grh.hop_limit = roce_get_field(
+ context->qpc_bytes_44,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
+ QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
+ qp_attr->ah_attr.grh.traffic_class = roce_get_field(
+ context->qpc_bytes_48,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
+ QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
+
+ memcpy(qp_attr->ah_attr.grh.dgid.raw, context->dgid,
+ sizeof(qp_attr->ah_attr.grh.dgid.raw));
+ }
+
+ qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
+ QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
+ qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
+ QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1;
+ qp_attr->sq_draining = 0;
+ qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
+ QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S);
+ qp_attr->max_dest_rd_atomic = roce_get_field(context->qpc_bytes_32,
+ QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
+ QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S);
+ qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24,
+ QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
+ QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S));
+ qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156,
+ QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
+ QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S));
+ qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
+ QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
+ QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
+ qp_attr->rnr_retry = context->rnr_retry;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+
+ if (!ibqp->uobject) {
+ qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+ } else {
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
+ }
+
+ qp_init_attr->cap = qp_attr->cap;
+
+out:
+ mutex_unlock(&hr_qp->mutex);
+ kfree(context);
+ return ret;
+}
+
+static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ int is_user)
+{
+ u32 sdbinvcnt;
+ unsigned long end = 0;
+ u32 sdbinvcnt_val;
+ u32 sdbsendptr_val;
+ u32 sdbisusepr_val;
+ struct hns_roce_cq *send_cq, *recv_cq;
+ struct device *dev = &hr_dev->pdev->dev;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
+ if (hr_qp->state != IB_QPS_RESET) {
+ /*
+ * Set qp to ERR,
+ * waiting for hw complete processing all dbs
+ */
+ if (hns_roce_v1_qp_modify(hr_dev, NULL,
+ to_hns_roce_state(
+ (enum ib_qp_state)hr_qp->state),
+ HNS_ROCE_QP_STATE_ERR, NULL,
+ hr_qp))
+ dev_err(dev, "modify QP %06x to ERR failed.\n",
+ hr_qp->qpn);
+
+ /* Record issued doorbell */
+ sdbisusepr_val = roce_readl(hr_dev->reg_base +
+ ROCEE_SDB_ISSUE_PTR_REG);
+ /*
+ * Query db process status,
+ * until hw process completely
+ */
+ end = msecs_to_jiffies(
+ HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies;
+ do {
+ sdbsendptr_val = roce_readl(hr_dev->reg_base +
+ ROCEE_SDB_SEND_PTR_REG);
+ if (!time_before(jiffies, end)) {
+ dev_err(dev, "destroy qp(0x%x) timeout!!!",
+ hr_qp->qpn);
+ break;
+ }
+ } while ((short)(roce_get_field(sdbsendptr_val,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) -
+ roce_get_field(sdbisusepr_val,
+ ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
+ ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
+ ) < 0);
+
+ /* Get list pointer */
+ sdbinvcnt = roce_readl(hr_dev->reg_base +
+ ROCEE_SDB_INV_CNT_REG);
+
+ /* Query db's list status, until hw reversal */
+ do {
+ sdbinvcnt_val = roce_readl(hr_dev->reg_base +
+ ROCEE_SDB_INV_CNT_REG);
+ if (!time_before(jiffies, end)) {
+ dev_err(dev, "destroy qp(0x%x) timeout!!!",
+ hr_qp->qpn);
+ dev_err(dev, "SdbInvCnt = 0x%x\n",
+ sdbinvcnt_val);
+ break;
+ }
+ } while ((short)(roce_get_field(sdbinvcnt_val,
+ ROCEE_SDB_INV_CNT_SDB_INV_CNT_M,
+ ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) -
+ (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0);
+
+ /* Modify qp to reset before destroying qp */
+ if (hns_roce_v1_qp_modify(hr_dev, NULL,
+ to_hns_roce_state(
+ (enum ib_qp_state)hr_qp->state),
+ HNS_ROCE_QP_STATE_RST, NULL, hr_qp))
+ dev_err(dev, "modify QP %06x to RESET failed.\n",
+ hr_qp->qpn);
+ }
+ }
+
+ send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
+ recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+
+ hns_roce_lock_cqs(send_cq, recv_cq);
+
+ if (!is_user) {
+ hns_roce_v1_clean_cq(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) : NULL);
+ if (send_cq != recv_cq)
+ hns_roce_v1_clean_cq(send_cq, hr_qp->qpn, NULL);
+ }
+
+ hns_roce_qp_remove(hr_dev, hr_qp);
+
+ hns_roce_unlock_cqs(send_cq, recv_cq);
+
+ hns_roce_qp_free(hr_dev, hr_qp);
+
+ /* Not special_QP, free their QPN */
+ if ((hr_qp->ibqp.qp_type == IB_QPT_RC) ||
+ (hr_qp->ibqp.qp_type == IB_QPT_UC) ||
+ (hr_qp->ibqp.qp_type == IB_QPT_UD))
+ hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+
+ hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+
+ if (is_user) {
+ ib_umem_release(hr_qp->umem);
+ } else {
+ kfree(hr_qp->sq.wrid);
+ kfree(hr_qp->rq.wrid);
+ hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+ }
+}
+
+int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject);
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ kfree(hr_to_hr_sqp(hr_qp));
+ else
+ kfree(hr_qp);
+
+ return 0;
+}
+
struct hns_roce_hw hns_roce_hw_v1 = {
.reset = hns_roce_v1_reset,
.hw_profile = hns_roce_v1_profile,
@@ -670,4 +2307,9 @@ struct hns_roce_hw hns_roce_hw_v1 = {
.set_gid = hns_roce_v1_set_gid,
.set_mac = hns_roce_v1_set_mac,
.set_mtu = hns_roce_v1_set_mtu,
+ .modify_qp = hns_roce_v1_modify_qp,
+ .query_qp = hns_roce_v1_query_qp,
+ .destroy_qp = hns_roce_v1_destroy_qp,
+ .post_send = hns_roce_v1_post_send,
+ .post_recv = hns_roce_v1_post_recv,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 143ef0b..917985c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -84,10 +84,636 @@
#define HNS_ROCE_ODB_EXTEND_MODE 1
+#define HNS_ROCE_CQE_QPN_MASK 0x3ffff
+
+#define QP1C_CFGN_OFFSET 0x28
#define PHY_PORT_OFFSET 0x8
#define ALL_PORT_VAL_OPEN 0x3f
#define POL_TIME_INTERVAL_VAL 0x80
#define SLEEP_TIME_INTERVAL 20
+#define SQ_PSN_SHIFT 8
+#define QKEY_VAL 0x80010000
+#define SDB_INV_CNT_OFFSET 8
+
+struct hns_roce_cqe {
+ u32 cqe_byte_4;
+ u32 cqe_byte_16;
+};
+
+#define CQE_BYTE_4_OWNER_S 7
+#define CQE_BYTE_4_SQ_RQ_FLAG_S 14
+
+#define CQE_BYTE_16_LOCAL_QPN_S 0
+#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S)
+
+struct hns_roce_wqe_ctrl_seg {
+ __be32 sgl_pa_h;
+ __be32 flag;
+ __be32 imm_data;
+ __be32 msg_length;
+};
+
+struct hns_roce_wqe_data_seg {
+ __be64 addr;
+ __be32 lkey;
+ __be32 len;
+};
+
+struct hns_roce_wqe_raddr_seg {
+ __be32 rkey;
+ __be32 len;/* reserved */
+ __be64 raddr;
+};
+
+struct hns_roce_rq_wqe_ctrl {
+
+ u32 rwqe_byte_4;
+ u32 rocee_sgl_ba_l;
+ u32 rwqe_byte_12;
+ u32 reserved[5];
+};
+
+#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16
+#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \
+ (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S)
+
+#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000
+
+#define GID_LEN 16
+
+struct hns_roce_ud_send_wqe {
+ u32 dmac_h;
+ u32 u32_8;
+ u32 immediate_data;
+
+ u32 u32_16;
+ union {
+ unsigned char dgid[GID_LEN];
+ struct {
+ u32 u32_20;
+ u32 u32_24;
+ u32 u32_28;
+ u32 u32_32;
+ };
+ };
+
+ u32 u32_36;
+ u32 u32_40;
+
+ u32 va0_l;
+ u32 va0_h;
+ u32 l_key0;
+
+ u32 va1_l;
+ u32 va1_h;
+ u32 l_key1;
+};
+
+#define UD_SEND_WQE_U32_4_DMAC_0_S 0
+#define UD_SEND_WQE_U32_4_DMAC_0_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S)
+
+#define UD_SEND_WQE_U32_4_DMAC_1_S 8
+#define UD_SEND_WQE_U32_4_DMAC_1_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S)
+
+#define UD_SEND_WQE_U32_4_DMAC_2_S 16
+#define UD_SEND_WQE_U32_4_DMAC_2_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S)
+
+#define UD_SEND_WQE_U32_4_DMAC_3_S 24
+#define UD_SEND_WQE_U32_4_DMAC_3_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S)
+
+#define UD_SEND_WQE_U32_8_DMAC_4_S 0
+#define UD_SEND_WQE_U32_8_DMAC_4_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S)
+
+#define UD_SEND_WQE_U32_8_DMAC_5_S 8
+#define UD_SEND_WQE_U32_8_DMAC_5_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
+
+#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
+#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \
+ (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
+
+#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24
+#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \
+ (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S)
+
+#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31
+
+#define UD_SEND_WQE_U32_16_DEST_QP_S 0
+#define UD_SEND_WQE_U32_16_DEST_QP_M \
+ (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S)
+
+#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24
+#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S)
+
+#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0
+#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \
+ (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S)
+
+#define UD_SEND_WQE_U32_36_PRIORITY_S 20
+#define UD_SEND_WQE_U32_36_PRIORITY_M \
+ (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S)
+
+#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24
+#define UD_SEND_WQE_U32_36_SGID_INDEX_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S)
+
+#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0
+#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S)
+
+#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8
+#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \
+ (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S)
+
+struct hns_roce_sqp_context {
+ u32 qp1c_bytes_4;
+ u32 sq_rq_bt_l;
+ u32 qp1c_bytes_12;
+ u32 qp1c_bytes_16;
+ u32 qp1c_bytes_20;
+ u32 qp1c_bytes_28;
+ u32 cur_rq_wqe_ba_l;
+ u32 qp1c_bytes_32;
+ u32 cur_sq_wqe_ba_l;
+ u32 qp1c_bytes_40;
+};
+
+#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
+#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \
+ (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
+
+#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12
+#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \
+ (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S)
+
+#define QP1C_BYTES_4_PD_S 16
+#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S)
+
+#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0
+#define QP1C_BYTES_12_SQ_RQ_BT_H_M \
+ (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S)
+
+#define QP1C_BYTES_16_RQ_HEAD_S 0
+#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S)
+
+#define QP1C_BYTES_16_PORT_NUM_S 16
+#define QP1C_BYTES_16_PORT_NUM_M \
+ (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S)
+
+#define QP1C_BYTES_16_SIGNALING_TYPE_S 27
+#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28
+#define QP1C_BYTES_16_RQ_BA_FLG_S 29
+#define QP1C_BYTES_16_SQ_BA_FLG_S 30
+#define QP1C_BYTES_16_QP1_ERR_S 31
+
+#define QP1C_BYTES_20_SQ_HEAD_S 0
+#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S)
+
+#define QP1C_BYTES_20_PKEY_IDX_S 16
+#define QP1C_BYTES_20_PKEY_IDX_M \
+ (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S)
+
+#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0
+#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \
+ (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S)
+
+#define QP1C_BYTES_28_RQ_CUR_IDX_S 16
+#define QP1C_BYTES_28_RQ_CUR_IDX_M \
+ (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S)
+
+#define QP1C_BYTES_32_TX_CQ_NUM_S 0
+#define QP1C_BYTES_32_TX_CQ_NUM_M \
+ (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S)
+
+#define QP1C_BYTES_32_RX_CQ_NUM_S 16
+#define QP1C_BYTES_32_RX_CQ_NUM_M \
+ (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S)
+
+#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0
+#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \
+ (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S)
+
+#define QP1C_BYTES_40_SQ_CUR_IDX_S 16
+#define QP1C_BYTES_40_SQ_CUR_IDX_M \
+ (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S)
+
+#define HNS_ROCE_WQE_INLINE (1UL<<31)
+#define HNS_ROCE_WQE_SE (1UL<<30)
+
+#define HNS_ROCE_WQE_SGE_NUM_BIT 24
+#define HNS_ROCE_WQE_IMM (1UL<<23)
+#define HNS_ROCE_WQE_FENCE (1UL<<21)
+#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20)
+
+#define HNS_ROCE_WQE_OPCODE_SEND (0<<16)
+#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16)
+#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16)
+#define HNS_ROCE_WQE_OPCODE_MASK (15<<16)
+
+struct hns_roce_qp_context {
+ u32 qpc_bytes_4;
+ u32 qpc_bytes_8;
+ u32 qpc_bytes_12;
+ u32 qpc_bytes_16;
+ u32 sq_rq_bt_l;
+ u32 qpc_bytes_24;
+ u32 irrl_ba_l;
+ u32 qpc_bytes_32;
+ u32 qpc_bytes_36;
+ u32 dmac_l;
+ u32 qpc_bytes_44;
+ u32 qpc_bytes_48;
+ u8 dgid[16];
+ u32 qpc_bytes_68;
+ u32 cur_rq_wqe_ba_l;
+ u32 qpc_bytes_76;
+ u32 rx_rnr_time;
+ u32 qpc_bytes_84;
+ u32 qpc_bytes_88;
+ union {
+ u32 rx_sge_len;
+ u32 dma_length;
+ };
+ union {
+ u32 rx_sge_num;
+ u32 rx_send_pktn;
+ u32 r_key;
+ };
+ u32 va_l;
+ u32 va_h;
+ u32 qpc_bytes_108;
+ u32 qpc_bytes_112;
+ u32 rx_cur_sq_wqe_ba_l;
+ u32 qpc_bytes_120;
+ u32 qpc_bytes_124;
+ u32 qpc_bytes_128;
+ u32 qpc_bytes_132;
+ u32 qpc_bytes_136;
+ u32 qpc_bytes_140;
+ u32 qpc_bytes_144;
+ u32 qpc_bytes_148;
+ union {
+ u32 rnr_retry;
+ u32 ack_time;
+ };
+ u32 qpc_bytes_156;
+ u32 pkt_use_len;
+ u32 qpc_bytes_164;
+ u32 qpc_bytes_168;
+ union {
+ u32 sge_use_len;
+ u32 pa_use_len;
+ };
+ u32 qpc_bytes_176;
+ u32 qpc_bytes_180;
+ u32 tx_cur_sq_wqe_ba_l;
+ u32 qpc_bytes_188;
+ u32 rvd21;
+};
+
+#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0
+#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S)
+
+#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3
+#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4
+#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5
+#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6
+#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7
+
+#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8
+#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \
+ (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S)
+
+#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12
+#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \
+ (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S)
+
+#define QP_CONTEXT_QPC_BYTES_4_PD_S 16
+#define QP_CONTEXT_QPC_BYTES_4_PD_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S)
+
+#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0
+#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S)
+
+#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16
+#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S)
+
+#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0
+#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S)
+
+#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16
+#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S)
+
+#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0
+#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S)
+
+#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0
+#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \
+ (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18
+#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \
+ (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)
+
+#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23
+
+#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0
+#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \
+ (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18
+#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S)
+
+#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20
+#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21
+#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22
+#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23
+
+#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24
+#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S)
+
+#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0
+#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S)
+
+#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24
+#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S)
+
+#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0
+#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16
+#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S)
+
+#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24
+#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S)
+
+#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0
+#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \
+ (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S)
+
+#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20
+#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S)
+
+#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28
+#define QP_CONTEXT_QPC_BYTES_48_MTU_M \
+ (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S)
+
+#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0
+#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S)
+
+#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16
+#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S)
+
+#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0
+#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \
+ (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8
+#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24
+#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S)
+
+#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0
+#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24
+#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25
+
+#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26
+#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \
+ (((1UL << 2) - 1) << \
+ QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S)
+
+#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29
+#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S)
+
+#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24
+#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25
+
+#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24
+#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S)
+
+#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0
+#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \
+ (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0
+#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16
+#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S)
+
+#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0
+#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24
+
+#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25
+#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S)
+
+#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27
+
+#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24
+#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S)
+
+#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24
+#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S)
+
+#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0
+#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16
+#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31
+
+#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0
+#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S)
+
+#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0
+#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S)
+
+#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2
+#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S)
+
+#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5
+#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S)
+
+#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8
+#define QP_CONTEXT_QPC_BYTES_148_LSN_M \
+ (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0
+#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3
+#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \
+ (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8
+#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11
+#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \
+ (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_SL_S 14
+#define QP_CONTEXT_QPC_BYTES_156_SL_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16
+#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S)
+
+#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24
+#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S)
+
+#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24
+#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \
+ (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S)
+
+#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0
+#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \
+ (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S)
+
+#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24
+#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S)
+
+#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26
+#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \
+ (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S)
+
+#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28
+#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29
+#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30
+
+#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0
+#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S)
+
+#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16
+#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S)
+
+#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0
+#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S)
+
+#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16
+#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S)
+
+#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0
+#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \
+ (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S)
+
+#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8
+
+#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16
+#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \
+ (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S)
+
+struct hns_roce_rq_db {
+ u32 u32_4;
+ u32 u32_8;
+};
+
+#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0
+#define RQ_DOORBELL_U32_4_RQ_HEAD_M \
+ (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S)
+
+#define RQ_DOORBELL_U32_8_QPN_S 0
+#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S)
+
+#define RQ_DOORBELL_U32_8_CMD_S 28
+#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S)
+
+#define RQ_DOORBELL_U32_8_HW_SYNC_S 31
+
+struct hns_roce_sq_db {
+ u32 u32_4;
+ u32 u32_8;
+};
+
+#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0
+#define SQ_DOORBELL_U32_4_SQ_HEAD_M \
+ (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
+
+#define SQ_DOORBELL_U32_4_PORT_S 18
+#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
+
+#define SQ_DOORBELL_U32_8_QPN_S 0
+#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S)
+
+#define SQ_DOORBELL_HW_SYNC_S 31
struct hns_roce_ext_db {
int esdb_almept;
diff --git a/drivers/infiniband/hw/hns/hns_roce_icm.c b/drivers/infiniband/hw/hns/hns_roce_icm.c
index cae6ec8..4ac06be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_icm.c
+++ b/drivers/infiniband/hw/hns/hns_roce_icm.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/scatterlist.h>
#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
#include "hns_roce_icm.h"
#include "hns_roce_common.h"
@@ -402,6 +403,49 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
mutex_unlock(&table->mutex);
}
+void *hns_roce_table_find(struct hns_roce_icm_table *table, int obj,
+ dma_addr_t *dma_handle)
+{
+ struct hns_roce_icm_chunk *chunk;
+ int idx, i;
+ int offset, dma_offset;
+ struct hns_roce_icm *icm;
+ struct page *page = NULL;
+
+ if (!table->lowmem)
+ return NULL;
+
+ mutex_lock(&table->mutex);
+ idx = (obj & (table->num_obj - 1)) * table->obj_size;
+ icm = table->icm[idx / HNS_ROCE_TABLE_CHUNK_SIZE];
+ dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE;
+
+ if (!icm)
+ goto out;
+
+ list_for_each_entry(chunk, &icm->chunk_list, list) {
+ for (i = 0; i < chunk->npages; ++i) {
+ if (dma_handle && dma_offset >= 0) {
+ if (sg_dma_len(&chunk->mem[i]) >
+ (u32)dma_offset)
+ *dma_handle = sg_dma_address(
+ &chunk->mem[i]) + dma_offset;
+ dma_offset -= sg_dma_len(&chunk->mem[i]);
+ }
+
+ if (chunk->mem[i].length > (u32)offset) {
+ page = sg_page(&chunk->mem[i]);
+ goto out;
+ }
+ offset -= chunk->mem[i].length;
+ }
+ }
+
+out:
+ mutex_unlock(&table->mutex);
+ return page ? lowmem_page_address(page) + offset : NULL;
+}
+
int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
struct hns_roce_icm_table *table, int start,
int end)
@@ -426,6 +470,17 @@ fail:
return ret;
}
+void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
+ struct hns_roce_icm_table *table, int start,
+ int end)
+{
+ int i;
+
+ for (i = start; i <= end;
+ i += HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size)
+ hns_roce_table_put(hr_dev, table, i);
+}
+
int hns_roce_init_icm_table(struct hns_roce_dev *hr_dev,
struct hns_roce_icm_table *table, u32 type,
int obj_size, int nobj, int reserved,
diff --git a/drivers/infiniband/hw/hns/hns_roce_icm.h b/drivers/infiniband/hw/hns/hns_roce_icm.h
index 78f8e90..f4f4782 100644
--- a/drivers/infiniband/hw/hns/hns_roce_icm.h
+++ b/drivers/infiniband/hw/hns/hns_roce_icm.h
@@ -30,6 +30,10 @@ enum {
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
(sizeof(struct scatterlist)))
+enum {
+ HNS_ROCE_ICM_PAGE_SHIFT = 12,
+};
+
struct hns_roce_icm_chunk {
struct list_head list;
int npages;
@@ -54,11 +58,14 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
struct hns_roce_icm_table *table, int obj);
void hns_roce_table_put(struct hns_roce_dev *hr_dev,
struct hns_roce_icm_table *table, int obj);
-
+void *hns_roce_table_find(struct hns_roce_icm_table *table, int obj,
+ dma_addr_t *dma_handle);
int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
struct hns_roce_icm_table *table, int start,
int end);
-
+void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
+ struct hns_roce_icm_table *table, int start,
+ int end);
int hns_roce_init_icm_table(struct hns_roce_dev *hr_dev,
struct hns_roce_icm_table *table, u32 type,
int obj_size, int nobj, int reserved,
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index b9b88f7..d2f8da5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -587,7 +587,11 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
(1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD);
+ (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ULL << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
/* HCA||device||port */
ib_dev->modify_device = hns_roce_modify_device;
@@ -610,6 +614,14 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->query_ah = hns_roce_query_ah;
ib_dev->destroy_ah = hns_roce_destroy_ah;
+ /* QP */
+ ib_dev->create_qp = hns_roce_create_qp;
+ ib_dev->modify_qp = hns_roce_modify_qp;
+ ib_dev->query_qp = hr_dev->hw->query_qp;
+ ib_dev->destroy_qp = hr_dev->hw->destroy_qp;
+ ib_dev->post_send = hr_dev->hw->post_send;
+ ib_dev->post_recv = hr_dev->hw->post_recv;
+
ret = ib_register_device(ib_dev, NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 1adcdda..edf0155 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/platform_device.h>
#include "hns_roce_device.h"
+#include "hns_roce_icm.h"
static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
u32 *seg)
@@ -137,6 +138,127 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
return 0;
}
+int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
+ struct hns_roce_mtt *mtt)
+{
+ int ret = 0;
+ int i;
+
+ /* Page num is zero, correspond to DMA memory register */
+ if (!npages) {
+ mtt->order = -1;
+ mtt->page_shift = HNS_ROCE_ICM_PAGE_SHIFT;
+ return 0;
+ }
+
+ /* Note: if page_shift is zero, FAST memory regsiter */
+ mtt->page_shift = page_shift;
+
+ /* Compute MTT entry necessary */
+ for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
+ i <<= 1)
+ ++mtt->order;
+
+ /* Allocate MTT entry */
+ ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg);
+ if (ret == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
+{
+ struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+
+ if (mtt->order < 0)
+ return;
+
+ hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+}
+
+static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt, u32 start_index,
+ u32 npages, u64 *page_list)
+{
+ u32 i = 0;
+ __le64 *mtts = NULL;
+ dma_addr_t dma_handle;
+ u32 s = start_index * sizeof(u64);
+
+ /* All MTTs must fit in the same page */
+ if (start_index / (PAGE_SIZE / sizeof(u64)) !=
+ (start_index + npages - 1) / (PAGE_SIZE / sizeof(u64)))
+ return -EINVAL;
+
+ if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
+ return -EINVAL;
+
+ mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
+ mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
+ &dma_handle);
+ if (!mtts)
+ return -ENOMEM;
+
+ /* Save page addr, low 12 bits : 0 */
+ for (i = 0; i < npages; ++i)
+ mtts[i] = (cpu_to_le64(page_list[i])) >> PAGE_ADDR_SHIFT;
+
+ return 0;
+}
+
+int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt,
+ u32 start_index, u32 npages, u64 *page_list)
+{
+ int chunk;
+ int ret;
+
+ if (mtt->order < 0)
+ return -EINVAL;
+
+ while (npages > 0) {
+ chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
+
+ ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
+ page_list);
+ if (ret)
+ return ret;
+
+ npages -= chunk;
+ start_index += chunk;
+ page_list += chunk;
+ }
+
+ return 0;
+}
+
+int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
+{
+ u32 i = 0;
+ int ret = 0;
+ u64 *page_list = NULL;
+
+ page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < buf->npages; ++i) {
+ if (buf->nbufs == 1)
+ page_list[i] = buf->direct.map + (i << buf->page_shift);
+ else
+ page_list[i] = buf->page_list[i].map;
+
+ }
+ ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
+
+ kfree(page_list);
+
+ return ret;
+}
+
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
@@ -185,3 +307,41 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
}
+int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtt *mtt, struct ib_umem *umem)
+{
+ struct scatterlist *sg;
+ int i, k, entry;
+ int ret = 0;
+ u64 *pages;
+ u32 n;
+ int len;
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ i = n = 0;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> mtt->page_shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) + umem->page_size * k;
+ if (i == PAGE_SIZE / sizeof(u64)) {
+ ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
+ pages);
+ if (ret)
+ goto out;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
+
+out:
+ free_page((unsigned long) pages);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e8d396b..7401c1a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -11,8 +11,12 @@
#include <linux/slab.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
+#include "hns_roce_common.h"
#include "hns_roce_device.h"
+#include "hns_roce_icm.h"
+#include "hns_roce_user.h"
+#define DB_REG_OFFSET 0x1000
#define SQP_NUM 12
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
@@ -40,6 +44,767 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
complete(&qp->free);
}
+static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
+ enum hns_roce_event type)
+{
+ struct ib_event event;
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (ibqp->event_handler) {
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06x\n",
+ type, hr_qp->qpn);
+ return;
+ }
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
+int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, int align,
+ int *base)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ int ret = 0;
+ u32 qpn;
+
+ ret = hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, &qpn);
+ if (ret == -1)
+ return -ENOMEM;
+
+ *base = qpn;
+
+ return 0;
+}
+
+enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return HNS_ROCE_QP_STATE_RST;
+ case IB_QPS_INIT:
+ return HNS_ROCE_QP_STATE_INIT;
+ case IB_QPS_RTR:
+ return HNS_ROCE_QP_STATE_RTR;
+ case IB_QPS_RTS:
+ return HNS_ROCE_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return HNS_ROCE_QP_STATE_SQD;
+ case IB_QPS_ERR:
+ return HNS_ROCE_QP_STATE_ERR;
+ default:
+ return HNS_ROCE_QP_NUM_STATE;
+ }
+}
+
+int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, int qpn,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ int ret;
+
+ if (!qpn)
+ return -EINVAL;
+
+ hr_qp->qpn = qpn;
+
+ spin_lock_irq(&qp_table->lock);
+ ret = radix_tree_insert(&hr_dev->qp_table_tree,
+ hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp);
+ spin_unlock_irq(&qp_table->lock);
+ if (ret) {
+ dev_err(&hr_dev->pdev->dev, "QPC radix_tree_insert failed\n");
+ goto err_put_irrl;
+ }
+
+ atomic_set(&hr_qp->refcount, 1);
+ init_completion(&hr_qp->free);
+
+ return 0;
+
+err_put_irrl:
+
+ return ret;
+}
+
+int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, int qpn,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ struct device *dev = &hr_dev->pdev->dev;
+ int ret;
+
+ if (!qpn)
+ return -EINVAL;
+
+ hr_qp->qpn = qpn;
+
+ /* Alloc memory for QPC */
+ ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "QPC table get failed\n");
+ goto err_out;
+ }
+
+ /* Alloc memory for IRRL */
+ ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "IRRL table get failed\n");
+ goto err_put_qp;
+ }
+
+ spin_lock_irq(&qp_table->lock);
+ ret = radix_tree_insert(&hr_dev->qp_table_tree,
+ hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp);
+ spin_unlock_irq(&qp_table->lock);
+ if (ret) {
+ dev_err(dev, "QPC radix_tree_insert failed\n");
+ goto err_put_irrl;
+ }
+
+ atomic_set(&hr_qp->refcount, 1);
+ init_completion(&hr_qp->free);
+
+ return 0;
+
+err_put_irrl:
+ hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+
+err_put_qp:
+ hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
+
+err_out:
+ return ret;
+}
+
+void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp_table->lock, flags);
+ radix_tree_delete(&hr_dev->qp_table_tree,
+ hr_qp->qpn & (hr_dev->caps.num_qps - 1));
+ spin_unlock_irqrestore(&qp_table->lock, flags);
+}
+
+void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+
+ if (atomic_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+ wait_for_completion(&hr_qp->free);
+
+ if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
+ hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+ hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
+ }
+}
+
+void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
+ int cnt)
+{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+
+ if (base_qpn < (hr_dev->caps.sqp_start + 2 * hr_dev->caps.num_ports))
+ return;
+
+ hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+}
+
+int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ int is_user, int has_srq, struct hns_roce_qp *hr_qp)
+{
+ u32 max_cnt;
+ struct device *dev = &hr_dev->pdev->dev;
+
+ /* Check the validity of QP support capacity */
+ if (cap->max_recv_wr > hr_dev->caps.max_wqes ||
+ cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
+ dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n",
+ cap->max_recv_wr, cap->max_recv_sge);
+ return -EINVAL;
+ }
+
+ /* If srq exit, set zero for relative number of rq */
+ if (has_srq) {
+ if (cap->max_recv_wr) {
+ dev_dbg(dev, "srq no need config max_recv_wr\n");
+ return -EINVAL;
+ }
+
+ hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0;
+ } else {
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
+ dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
+ return -EINVAL;
+ }
+
+ /* In v1 engine, parameter verification procession */
+ max_cnt = cap->max_recv_wr > HNS_ROCE_MIN_WQE_NUM ?
+ cap->max_recv_wr : HNS_ROCE_MIN_WQE_NUM;
+ hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
+
+ if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
+ dev_err(dev, "hns_roce_set_rq_size rq.wqe_cnt too large\n");
+ return -EINVAL;
+ }
+
+ max_cnt = max(1U, cap->max_recv_sge);
+ hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
+ /* WQE is fixed for 64B */
+ hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
+ }
+
+ cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt;
+ cap->max_recv_sge = hr_qp->rq.max_gs;
+
+ return 0;
+}
+
+int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
+ u8 max_sq_stride = ilog2(roundup_sq_stride);
+
+ /* Sanity check SQ size before proceeding */
+ if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes ||
+ ucmd->log_sq_stride > max_sq_stride ||
+ ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
+ dev_err(&hr_dev->pdev->dev, "check SQ size error!\n");
+ return -EINVAL;
+ }
+
+ hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
+ hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
+
+ /* Get buf size, SQ and RQ are aligned to page_szie */
+ hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
+ hr_qp->rq.wqe_shift), PAGE_SIZE) +
+ HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ hr_qp->sq.wqe_shift), PAGE_SIZE);
+
+ hr_qp->sq.offset = 0;
+ hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ hr_qp->sq.wqe_shift), PAGE_SIZE);
+
+ return 0;
+}
+
+int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, enum ib_qp_type type,
+ struct hns_roce_qp *hr_qp)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 max_cnt;
+ (void)type;
+
+ if (cap->max_send_wr > hr_dev->caps.max_wqes ||
+ cap->max_send_sge > hr_dev->caps.max_sq_sg ||
+ cap->max_inline_data > hr_dev->caps.max_sq_inline) {
+ dev_err(dev, "hns_roce_set_kernel_sq_size error1\n");
+ return -EINVAL;
+ }
+
+ hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
+ hr_qp->sq_max_wqes_per_wr = 1;
+ hr_qp->sq_spare_wqes = 0;
+
+ /* In v1 engine, parameter verification procession */
+ max_cnt = cap->max_send_wr > HNS_ROCE_MIN_WQE_NUM ?
+ cap->max_send_wr : HNS_ROCE_MIN_WQE_NUM;
+ hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
+ if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
+ dev_err(dev, "hns_roce_set_kernel_sq_size sq.wqe_cnt too large\n");
+ return -EINVAL;
+ }
+
+ /* Get data_seg numbers */
+ max_cnt = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
+
+ /* Get buf size, SQ and RQ are aligned to page_szie */
+ hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
+ hr_qp->rq.wqe_shift), PAGE_SIZE) +
+ HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ hr_qp->sq.wqe_shift), PAGE_SIZE);
+ hr_qp->sq.offset = 0;
+ hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
+ hr_qp->sq.wqe_shift), PAGE_SIZE);
+
+ /* Get wr and sge number which send */
+ cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt;
+ cap->max_send_sge = hr_qp->sq.max_gs;
+
+ /* We don't support inline sends for kernel QPs (yet) */
+ cap->max_inline_data = 0;
+
+ return 0;
+}
+
+static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
+ struct ib_pd *ib_pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, int sqpn,
+ struct hns_roce_qp *hr_qp)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_ib_create_qp ucmd;
+ int qpn = 0;
+ int ret = 0;
+
+ mutex_init(&hr_qp->mutex);
+ spin_lock_init(&hr_qp->sq.lock);
+ spin_lock_init(&hr_qp->rq.lock);
+
+ hr_qp->state = IB_QPS_RESET;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
+ else
+ hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+
+ ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject,
+ !!init_attr->srq, hr_qp);
+ if (ret) {
+ dev_err(dev, "hns_roce_set_rq_size failed\n");
+ goto err_out;
+ }
+
+ if (ib_pd->uobject) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ dev_err(dev, "ib_copy_from_udata error for create qp\n");
+ ret = -EFAULT;
+ goto err_out;
+ }
+
+ ret = hns_roce_set_user_sq_size(hr_dev, hr_qp, &ucmd);
+ if (ret) {
+ dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
+ goto err_out;
+ }
+
+ hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
+ ucmd.buf_addr, hr_qp->buff_size, 0,
+ 0);
+ if (IS_ERR(hr_qp->umem)) {
+ dev_err(dev, "ib_umem_get error for create qp\n");
+ ret = PTR_ERR(hr_qp->umem);
+ goto err_out;
+ }
+
+ ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem),
+ ilog2((unsigned int)hr_qp->umem->page_size),
+ &hr_qp->mtt);
+ if (ret) {
+ dev_err(dev, "hns_roce_mtt_init error for create qp\n");
+ goto err_buf;
+ }
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt,
+ hr_qp->umem);
+ if (ret) {
+ dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n");
+ goto err_mtt;
+ }
+ } else {
+ if (init_attr->create_flags &
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ dev_err(dev, "init_attr->create_flags error!\n");
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+ dev_err(dev, "init_attr->create_flags error!\n");
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ /* Set SQ size */
+ ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap,
+ init_attr->qp_type, hr_qp);
+ if (ret) {
+ dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
+ goto err_out;
+ }
+
+ /* QP doorbell register address */
+ hr_qp->sq.db_reg_l = hr_dev->reg_base + ROCEE_DB_SQ_L_0_REG +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+ hr_qp->rq.db_reg_l = hr_dev->reg_base +
+ ROCEE_DB_OTHERS_L_0_REG +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ /* Allocate QP buf */
+ if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, PAGE_SIZE * 2,
+ &hr_qp->hr_buf)) {
+ dev_err(dev, "hns_roce_buf_alloc error!\n");
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ /* Write MTT */
+ ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages,
+ hr_qp->hr_buf.page_shift, &hr_qp->mtt);
+ if (ret) {
+ dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n");
+ goto err_buf;
+ }
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt,
+ &hr_qp->hr_buf);
+ if (ret) {
+ dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n");
+ goto err_mtt;
+ }
+
+ hr_qp->sq.wrid = kmalloc_array(hr_qp->sq.wqe_cnt, sizeof(u64),
+ GFP_KERNEL);
+ hr_qp->rq.wrid = kmalloc_array(hr_qp->rq.wqe_cnt, sizeof(u64),
+ GFP_KERNEL);
+ if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) {
+ ret = -ENOMEM;
+ dev_err(dev, "wrid buf alloc failed!\n");
+ goto err_wrid;
+ }
+ }
+
+ if (sqpn) {
+ qpn = sqpn;
+ } else {
+ /* Get QPN */
+ ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn);
+ if (ret) {
+ dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n");
+ goto err_wrid;
+ }
+ }
+
+ if ((init_attr->qp_type) == IB_QPT_GSI) {
+ ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
+ if (ret) {
+ dev_err(dev, "hns_roce_qp_alloc failed!\n");
+ goto err_qpn;
+ }
+ } else {
+ ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp);
+ if (ret) {
+ dev_err(dev, "hns_roce_qp_alloc failed!\n");
+ goto err_qpn;
+ }
+ }
+
+ if (sqpn)
+ hr_qp->doorbell_qpn = 1;
+ else
+ hr_qp->doorbell_qpn = cpu_to_le32(hr_qp->qpn);
+
+ hr_qp->event = hns_roce_ib_qp_event;
+
+ return 0;
+
+err_qpn:
+ if (!sqpn)
+ hns_roce_release_range_qp(hr_dev, qpn, 1);
+
+err_wrid:
+ kfree(hr_qp->sq.wrid);
+ kfree(hr_qp->rq.wrid);
+
+err_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+
+err_buf:
+ if (ib_pd->uobject)
+ ib_umem_release(hr_qp->umem);
+ else
+ hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+
+err_out:
+ return ret;
+}
+
+struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_sqp *hr_sqp;
+ struct hns_roce_qp *hr_qp;
+ int ret;
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC: {
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (!hr_qp) {
+ dev_err(dev, "hr_qp alloc failed!\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
+ hr_qp);
+ if (ret) {
+ dev_err(dev, "Create RC QP failed\n");
+ kfree(hr_qp);
+ return ERR_PTR(ret);
+ }
+
+ hr_qp->ibqp.qp_num = hr_qp->qpn;
+
+ break;
+ }
+ case IB_QPT_GSI: {
+ /* Userspace is not allowed to create special QPs: */
+ if (pd->uobject) {
+ dev_err(dev, "not support usr space GSI\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL);
+ if (!hr_sqp)
+ return ERR_PTR(-ENOMEM);
+
+ hr_qp = &hr_sqp->hr_qp;
+
+ ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
+ hr_dev->caps.sqp_start +
+ hr_dev->caps.num_ports +
+ init_attr->port_num - 1, hr_qp);
+ if (ret) {
+ dev_err(dev, "Create GSI QP failed!\n");
+ kfree(hr_sqp);
+ return ERR_PTR(ret);
+ }
+
+ hr_qp->port = (init_attr->port_num - 1);
+ hr_qp->ibqp.qp_num = hr_dev->caps.sqp_start +
+ hr_dev->caps.num_ports +
+ init_attr->port_num - 1;
+ break;
+ }
+ default:{
+ dev_err(dev, "not support QP type %d\n", init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return &hr_qp->ibqp;
+}
+
+int to_hr_qp_type(int qp_type)
+{
+ int transport_type;
+
+ if (qp_type == IB_QPT_RC)
+ transport_type = SERV_TYPE_RC;
+ else if (qp_type == IB_QPT_UC)
+ transport_type = SERV_TYPE_UC;
+ else if (qp_type == IB_QPT_UD)
+ transport_type = SERV_TYPE_UD;
+ else if (qp_type == IB_QPT_GSI)
+ transport_type = SERV_TYPE_UD;
+ else
+ transport_type = -1;
+
+ return transport_type;
+}
+
+int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ struct device *dev = &hr_dev->pdev->dev;
+ int ret = -EINVAL;
+ int p;
+
+ mutex_lock(&hr_qp->mutex);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ?
+ attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
+ new_state = attr_mask & IB_QP_STATE ?
+ attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_ETHERNET)) {
+ dev_err(dev, "ib_modify_qp_is_ok failed\n");
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
+ dev_err(dev, "attr port_num invalid.attr->port_num=%d\n",
+ attr->port_num);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ p = attr_mask & IB_QP_PORT ? attr->port_num : (hr_qp->port + 1);
+ if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
+ dev_dbg(dev, "attr pkey_index invalid.attr->pkey_index=%d\n",
+ attr->pkey_index);
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
+ dev_dbg(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
+ attr->max_rd_atomic);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
+ dev_dbg(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
+ attr->max_dest_rd_atomic);
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ ret = -EPERM;
+ dev_dbg(dev, "cur_state=%d new_state=%d\n", cur_state,
+ new_state);
+ goto out;
+ }
+
+ ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
+ new_state);
+
+out:
+ mutex_unlock(&hr_qp->mutex);
+
+ return ret;
+}
+
+void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
+{
+ if (send_cq == recv_cq) {
+ spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
+ struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
+ __releases(&recv_cq->lock)
+{
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+__be32 send_ieth(struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return cpu_to_le32(wr->ex.imm_data);
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_le32(wr->ex.invalidate_rkey);
+ default:
+ return 0;
+ }
+}
+
+static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
+{
+
+ return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
+}
+
+void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+
+ if ((n < 0) || (n > hr_qp->rq.wqe_cnt)) {
+ dev_err(&hr_dev->pdev->dev, "rq wqe index:%d,rq wqe cnt:%d\r\n",
+ n, hr_qp->rq.wqe_cnt);
+ return NULL;
+ }
+
+ return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
+}
+
+void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+
+ if ((n < 0) || (n > hr_qp->sq.wqe_cnt)) {
+ dev_err(&hr_dev->pdev->dev, "sq wqe index:%d,sq wqe cnt:%d\r\n",
+ n, hr_qp->sq.wqe_cnt);
+ return NULL;
+ }
+
+ return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
+}
+
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+ struct ib_cq *ib_cq)
+{
+ struct hns_roce_cq *hr_cq;
+ u32 cur;
+
+ cur = hr_wq->head - hr_wq->tail;
+ if (likely(cur + nreq < hr_wq->max_post))
+ return 0;
+
+ hr_cq = to_hr_cq(ib_cq);
+ spin_lock(&hr_cq->lock);
+ cur = hr_wq->head - hr_wq->tail;
+ spin_unlock(&hr_cq->lock);
+
+ return cur + nreq >= hr_wq->max_post;
+}
+
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h
index 457e77a..73c3f0c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_user.h
+++ b/drivers/infiniband/hw/hns/hns_roce_user.h
@@ -10,6 +10,12 @@
#ifndef _HNS_ROCE_USER_H
#define _HNS_ROCE_USER_H
+struct hns_roce_ib_create_qp {
+ __u64 buf_addr;
+ __u8 log_sq_bb_count;
+ __u8 log_sq_stride;
+};
+
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
};
--
1.9.1
^ permalink raw reply related
* [PATCH v5 18/21] IB/hns: Add CQ operation implemention support
From: Lijun Ou @ 2016-04-23 10:26 UTC (permalink / raw)
To: dledford, sean.hefty, hal.rosenstock, davem, jeffrey.t.kirsher,
jiri, ogerlitz, linuxarm
Cc: linux-rdma, linux-kernel, netdev, gongyangming, xiaokun,
tangchaofei, oulijun, haifeng.wei, yisen.zhuang, yankejian,
lisheng011, charles.chenxin
In-Reply-To: <1461407219-72027-1-git-send-email-oulijun@huawei.com>
This patch was implemention for Completion Queue(CQ) operations.
A CQ can be used to multiplex work completions from multiple work
queues across queue pairs on the same HCA. CQ as the notification
mechanism for Work Request completions.
CQ operations implemention as follows:
1. create CQ. CQ are created through the Channel Interface,
The maximum number of Completion Queue Entries (CQEs) that
may be outstanding on a CQ must be specified when the CQ
is created.
2. destroy CQ. Destroys the specified CQ. Resources allocated
by the Channel Interface to implement the CQ must be
deallocated during the destroy operation.
3. request completion notification. Requests the CQ event handler
be called when the next completion entry of the specified type
is added to the specified CQ.
4. poll CQ. Polls the specified CQ for a Work Completion.
A Work Completion indicates that a Work Request for a Work Queue
associated with the CQ is done.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu(Xavier) <xavier.huwei@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_cq.c | 356 ++++++++++++++++++++++++++++
drivers/infiniband/hw/hns/hns_roce_device.h | 34 ++-
drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 340 ++++++++++++++++++++++++++
drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 117 +++++++++
drivers/infiniband/hw/hns/hns_roce_main.c | 10 +
drivers/infiniband/hw/hns/hns_roce_user.h | 4 +
6 files changed, 860 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index f7baf82..16eaa8d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -11,6 +11,362 @@
#include <linux/log2.h>
#include <linux/slab.h>
#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_icm.h"
+#include "hns_roce_user.h"
+#include "hns_roce_common.h"
+
+static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
+{
+ struct ib_cq *ibcq = &hr_cq->ib_cq;
+
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq,
+ enum hns_roce_event event_type)
+{
+ struct hns_roce_dev *hr_dev;
+ struct ib_event event;
+ struct ib_cq *ibcq;
+
+ ibcq = &hr_cq->ib_cq;
+ hr_dev = to_hr_dev(ibcq->device);
+
+ if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
+ dev_err(&hr_dev->pdev->dev,
+ "hns_roce_ib: Unexpected event type 0x%x on CQ %06x\n",
+ event_type, hr_cq->cqn);
+ return;
+ }
+
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.event = IB_EVENT_CQ_ERR;
+ event.element.cq = ibcq;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
+}
+
+static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox, int cq_num)
+{
+ return hns_roce_cmd(dev, mailbox->dma, cq_num, 0,
+ HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+}
+
+static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
+ struct hns_roce_mtt *hr_mtt,
+ struct hns_roce_uar *hr_uar,
+ struct hns_roce_cq *hr_cq, int vector,
+ int collapsed)
+{
+ struct hns_roce_cmd_mailbox *mailbox = NULL;
+ struct hns_roce_cq_table *cq_table = NULL;
+ struct device *dev = &hr_dev->pdev->dev;
+ dma_addr_t dma_handle;
+ u64 *mtts = NULL;
+ int ret = 0;
+
+ cq_table = &hr_dev->cq_table;
+
+ /* Get the physical address of cq buf */
+ mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
+ hr_mtt->first_seg, &dma_handle);
+ if (!mtts) {
+ dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n");
+ return -EINVAL;
+ }
+
+ if (vector >= hr_dev->caps.num_comp_vectors) {
+ dev_err(dev, "CQ alloc.Invalid vector.\n");
+ return -EINVAL;
+ }
+ hr_cq->vector = vector;
+
+ ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
+ if (ret == -1) {
+ dev_err(dev, "CQ alloc.Failed to alloc index.\n");
+ return -ENOMEM;
+ }
+
+ /* Get CQC memory icm table */
+ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
+ if (ret) {
+ dev_err(dev, "CQ alloc.Failed to get context mem.\n");
+ goto err_out;
+ }
+
+ /* The cq insert radix tree */
+ spin_lock_irq(&cq_table->lock);
+ /* Radix_tree: The associated pointer and long integer key value like */
+ ret = radix_tree_insert(&cq_table->tree, hr_cq->cqn, hr_cq);
+ spin_unlock_irq(&cq_table->lock);
+ if (ret) {
+ dev_err(dev, "CQ alloc.Failed to radix_tree_insert.\n");
+ goto err_put;
+ }
+
+ /* Applicate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ dev_err(dev, "CQ alloc.Failed to alloc mailbox.\n");
+ ret = PTR_ERR(mailbox);
+ goto err_radix;
+ }
+
+ hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle,
+ nent, vector);
+
+ /* CQ instructions which sw send to hw be transimited via mailbox */
+ ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret) {
+ dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n");
+ goto err_radix;
+ }
+
+ hr_cq->cons_index = 0;
+ hr_cq->uar = hr_uar;
+
+ return 0;
+
+err_radix:
+ spin_lock_irq(&cq_table->lock);
+ radix_tree_delete(&cq_table->tree, hr_cq->cqn);
+ spin_unlock_irq(&cq_table->lock);
+
+err_put:
+ hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
+
+err_out:
+ hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+ return ret;
+}
+
+static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox, int cq_num)
+{
+ return hns_roce_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
+ mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
+ HNS_ROCE_CMD_TIME_CLASS_A);
+}
+
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct device *dev = &hr_dev->pdev->dev;
+ int ret;
+
+ ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn);
+ if (ret)
+ dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", ret,
+ hr_cq->cqn);
+
+ /* Waiting interrupt process procedure carried out */
+ synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
+
+ spin_lock_irq(&cq_table->lock);
+ radix_tree_delete(&cq_table->tree, hr_cq->cqn);
+ spin_unlock_irq(&cq_table->lock);
+
+ hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
+ hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+}
+
+static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
+ struct ib_ucontext *context,
+ struct hns_roce_cq_buf *buf,
+ struct ib_umem **umem, u64 buf_addr, int cqe)
+{
+ int ret;
+
+ /* Get and mapping user space */
+ *umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(*umem))
+ return PTR_ERR(*umem);
+
+ ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
+ ilog2((unsigned int)(*umem)->page_size),
+ &buf->hr_mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem);
+ if (ret)
+ goto err_mtt;
+
+ return 0;
+
+err_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+
+err_buf:
+ ib_umem_release(*umem);
+ return ret;
+}
+
+static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq_buf *buf, u32 nent)
+{
+ int ret;
+
+ ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
+ PAGE_SIZE * 2, &buf->hr_buf);
+ if (ret)
+ goto out;
+
+ ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages,
+ buf->hr_buf.page_shift, &buf->hr_mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf);
+ if (ret)
+ goto err_mtt;
+
+ return 0;
+
+err_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+
+err_buf:
+ hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz,
+ &buf->hr_buf);
+out:
+ return ret;
+}
+
+static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq_buf *buf, int cqe)
+{
+ hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz,
+ &buf->hr_buf);
+}
+
+struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_ib_create_cq ucmd;
+ struct hns_roce_cq *hr_cq = NULL;
+ struct hns_roce_uar *uar = NULL;
+ int vector = attr->comp_vector;
+ int cq_entries = attr->cqe;
+ int ret = 0;
+
+ if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
+ dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
+ cq_entries, hr_dev->caps.max_cqes);
+ return ERR_PTR(-EINVAL);
+ }
+
+ hr_cq = kmalloc(sizeof(*hr_cq), GFP_KERNEL);
+ if (!hr_cq)
+ return ERR_PTR(-ENOMEM);
+
+ /* In v1 engine, parameter verification */
+ if (cq_entries < HNS_ROCE_MIN_CQE_NUM)
+ cq_entries = HNS_ROCE_MIN_CQE_NUM;
+
+ cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
+ hr_cq->ib_cq.cqe = cq_entries - 1;
+ mutex_init(&hr_cq->resize_mutex);
+ spin_lock_init(&hr_cq->lock);
+ hr_cq->hr_resize_buf = NULL;
+ hr_cq->resize_umem = NULL;
+
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ dev_err(dev, "Failed to copy_from_udata.\n");
+ ret = -EFAULT;
+ goto err_cq;
+ }
+
+ /* Get user space address, write it into mtt table */
+ ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf,
+ &hr_cq->umem, ucmd.buf_addr,
+ cq_entries);
+ if (ret) {
+ dev_err(dev, "Failed to get_cq_umem.\n");
+ goto err_cq;
+ }
+
+ /* Get user space parameters */
+ uar = &to_hr_ucontext(context)->uar;
+ } else {
+ /* Init mmt table and write buff address to mtt table */
+ ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
+ cq_entries);
+ if (ret) {
+ dev_err(dev, "Failed to alloc_cq_buf.\n");
+ goto err_cq;
+ }
+
+ uar = &hr_dev->priv_uar;
+ hr_cq->cq_db_l = hr_dev->reg_base + ROCEE_DB_OTHERS_L_0_REG +
+ 0x1000 * uar->index;
+ }
+
+ /* Allocate cq index, fill cq_context */
+ ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt,
+ uar, hr_cq, vector, 0);
+ if (ret) {
+ dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
+ goto err_mtt;
+ }
+
+ /* Get created cq handler and carry out event */
+ hr_cq->comp = hns_roce_ib_cq_comp;
+ hr_cq->event = hns_roce_ib_cq_event;
+ hr_cq->cq_depth = cq_entries;
+
+ if (context) {
+ if (ib_copy_to_udata(udata, &(hr_cq->cqn), sizeof(__u32))) {
+ ret = -EFAULT;
+ goto err_mtt;
+ }
+ }
+
+ return &hr_cq->ib_cq;
+
+err_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ if (context)
+ ib_umem_release(hr_cq->umem);
+ else
+ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
+ hr_cq->ib_cq.cqe);
+
+err_cq:
+ kfree(hr_cq);
+ return ERR_PTR(ret);
+}
+
+int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+
+ hns_roce_free_cq(hr_dev, hr_cq);
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+
+ if (ib_cq->uobject)
+ ib_umem_release(hr_cq->umem);
+ else
+ /* Free the buff of stored cq */
+ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+
+ kfree(hr_cq);
+
+ return 0;
+}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
{
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index e204a7f..21698c5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -31,6 +31,7 @@
#define HNS_ROCE_BA_SIZE (32 * 4096)
/* Hardware specification only for v1 engine */
+#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_MIN_WQE_NUM 0x20
#define HNS_ROCE_MAX_IRQ_NUM 34
@@ -124,6 +125,12 @@ enum {
HNS_ROCE_CMD_SUCCESS = 1,
};
+enum {
+ /* RQ&SRQ related operations */
+ HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
+ HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07,
+};
+
#define HNS_ROCE_PORT_DOWN 0
#define HNS_ROCE_PORT_UP 1
@@ -229,20 +236,33 @@ struct hns_roce_buf {
struct hns_roce_cq_buf {
struct hns_roce_buf hr_buf;
+ struct hns_roce_mtt hr_mtt;
+};
+
+struct hns_roce_cq_resize {
+ struct hns_roce_cq_buf hr_buf;
+ int cqe;
};
struct hns_roce_cq {
struct ib_cq ib_cq;
struct hns_roce_cq_buf hr_buf;
/* pointer to store information after resize*/
+ struct hns_roce_cq_resize *hr_resize_buf;
spinlock_t lock;
+ struct mutex resize_mutex;
+ struct ib_umem *umem;
+ struct ib_umem *resize_umem;
void (*comp)(struct hns_roce_cq *);
void (*event)(struct hns_roce_cq *, enum hns_roce_event);
+ struct hns_roce_uar *uar;
u32 cq_depth;
u32 cons_index;
void __iomem *cq_db_l;
+ void __iomem *tptr_addr;
u32 cqn;
+ u32 vector;
atomic_t refcount;
struct completion free;
};
@@ -451,6 +471,9 @@ struct hns_roce_hw {
void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
enum ib_mtu mtu);
+ void (*write_cqc)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
+ dma_addr_t dma_handle, int nent, u32 vector);
int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
@@ -461,7 +484,8 @@ struct hns_roce_hw {
struct ib_send_wr **bad_wr);
int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
-
+ int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+ int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
void *priv;
};
@@ -485,6 +509,7 @@ struct hns_roce_dev {
u32 vendor_id;
u32 vendor_part_id;
u32 hw_rev;
+ void __iomem *priv_addr;
struct hns_roce_cmdq cmd;
struct hns_roce_bitmap pd_bitmap;
@@ -645,6 +670,13 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
__be32 send_ieth(struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
+struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
+
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 3bbf570..2006bfd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1043,6 +1043,11 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
!!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL;
}
+static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
+{
+ return get_sw_cqe(hr_cq, hr_cq->cons_index);
+}
+
void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index,
spinlock_t *doorbell_lock)
@@ -1118,6 +1123,338 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
spin_unlock_irq(&hr_cq->lock);
}
+void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
+ dma_addr_t dma_handle, int nent, u32 vector)
+{
+ struct hns_roce_cq_context *cq_context = NULL;
+ void __iomem *tptr_addr;
+
+ cq_context = mb_buf;
+ memset(cq_context, 0, sizeof(*cq_context));
+
+ tptr_addr = 0;
+ hr_dev->priv_addr = tptr_addr;
+ hr_cq->tptr_addr = tptr_addr;
+
+ /* Register cq_context members */
+ roce_set_field(cq_context->cqc_byte_4,
+ CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M,
+ CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
+ roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
+ CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
+ cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4);
+
+ cq_context->cq_bt_l = (u32)dma_handle;
+ cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l);
+
+ roce_set_field(cq_context->cqc_byte_12,
+ CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
+ CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S,
+ ((u64)dma_handle >> ADDR_SHIFT_32));
+ roce_set_field(cq_context->cqc_byte_12,
+ CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M,
+ CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S,
+ ilog2((unsigned int)nent));
+ roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
+ CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
+ cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12);
+
+ cq_context->cur_cqe_ba0_l = (u32)(mtts[0]);
+ cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l);
+
+ roce_set_field(cq_context->cqc_byte_20,
+ CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
+ CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S,
+ cpu_to_le32((mtts[0]) >> ADDR_SHIFT_32));
+ /* Dedicated hardware, directly set 0 */
+ roce_set_field(cq_context->cqc_byte_20,
+ CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
+ CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0);
+ roce_set_field(cq_context->cqc_byte_20,
+ CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
+ CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
+ (u64)tptr_addr >> ADDR_SHIFT_44);
+ cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
+
+ cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> ADDR_SHIFT_12);
+
+ roce_set_field(cq_context->cqc_byte_32,
+ CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
+ CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0);
+ roce_set_bit(cq_context->cqc_byte_32,
+ CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0);
+ roce_set_bit(cq_context->cqc_byte_32,
+ CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0);
+ roce_set_bit(cq_context->cqc_byte_32,
+ CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0);
+ roce_set_bit(cq_context->cqc_byte_32,
+ CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
+ 0);
+ /*The initial value of cq's ci is 0 */
+ roce_set_field(cq_context->cqc_byte_32,
+ CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
+ CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
+ cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32);
+}
+
+int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ u32 notification_flag;
+ u32 doorbell[2];
+ int ret = 0;
+
+ notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
+ IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
+ /*
+ * flags = 0; Notification Flag = 1, next
+ * flags = 1; Notification Flag = 0, solocited
+ */
+ doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
+ roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
+ roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
+ ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
+ roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
+ ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1);
+ roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
+ ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S,
+ hr_cq->cqn | notification_flag);
+
+ hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
+
+ return ret;
+}
+
+static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
+ struct hns_roce_qp **cur_qp, struct ib_wc *wc)
+{
+ int qpn;
+ int is_send;
+ u16 wqe_ctr;
+ u32 status;
+ u32 opcode;
+ struct hns_roce_cqe *cqe;
+ struct hns_roce_qp *hr_qp;
+ struct hns_roce_wq *wq;
+ struct hns_roce_wqe_ctrl_seg *sq_wqe;
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct device *dev = &hr_dev->pdev->dev;
+
+ /* Find cqe according consumer index */
+ cqe = next_cqe_sw(hr_cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ ++hr_cq->cons_index;
+ /* Memory barrier */
+ rmb();
+ /* 0->SQ, 1->RQ */
+ is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S));
+
+ /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */
+ if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
+ CQE_BYTE_16_LOCAL_QPN_S) <= 1) {
+ qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M,
+ CQE_BYTE_20_PORT_NUM_S) +
+ roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
+ CQE_BYTE_16_LOCAL_QPN_S) *
+ HNS_ROCE_MAX_PORTS;
+ } else {
+ qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
+ CQE_BYTE_16_LOCAL_QPN_S);
+ }
+
+ if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) {
+ hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
+ if (unlikely(!hr_qp)) {
+ dev_err(dev, "CQ %06x with entry for unknown QPN %06x\n",
+ hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK));
+ return -EINVAL;
+ }
+
+ *cur_qp = hr_qp;
+ }
+
+ wc->qp = &(*cur_qp)->ibqp;
+ wc->vendor_err = 0;
+
+ status = roce_get_field(cqe->cqe_byte_4,
+ CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
+ CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) &
+ HNS_ROCE_CQE_STATUS_MASK;
+ switch (status) {
+ case HNS_ROCE_CQE_SUCCESS:
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR:
+ wc->status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR:
+ wc->status = IB_WC_BAD_RESP_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
+ wc->status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ wc->status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR:
+ wc->status = IB_WC_REM_OP_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ wc->status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ default:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ /* CQE status error, directly return */
+ if (wc->status != IB_WC_SUCCESS)
+ return 0;
+
+ if (is_send) {
+ /* SQ conrespond to CQE */
+ sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4,
+ CQE_BYTE_4_WQE_INDEX_M,
+ CQE_BYTE_4_WQE_INDEX_S));
+ switch (sq_wqe->flag & HNS_ROCE_WQE_OPCODE_MASK) {
+ case HNS_ROCE_WQE_OPCODE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case HNS_ROCE_WQE_OPCODE_RDMA_READ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+ break;
+ case HNS_ROCE_WQE_OPCODE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case HNS_ROCE_WQE_OPCODE_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case HNS_ROCE_WQE_OPCODE_UD_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ default:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+ wc->wc_flags = (sq_wqe->flag & HNS_ROCE_WQE_IMM ?
+ IB_WC_WITH_IMM : 0);
+
+ wq = &(*cur_qp)->sq;
+ if ((*cur_qp)->sq_signal_bits) {
+ /*
+ * If sg_signal_bit is 1,
+ * firstly tail pointer updated to wqe
+ * which current cqe correspond to
+ */
+ wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
+ CQE_BYTE_4_WQE_INDEX_M,
+ CQE_BYTE_4_WQE_INDEX_S);
+ wq->tail += (wqe_ctr - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
+ }
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ } else {
+ /* RQ conrespond to CQE */
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+ opcode = roce_get_field(cqe->cqe_byte_4,
+ CQE_BYTE_4_OPERATION_TYPE_M,
+ CQE_BYTE_4_OPERATION_TYPE_S) &
+ HNS_ROCE_CQE_OPCODE_MASK;
+ switch (opcode) {
+ case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = le32_to_cpu(cqe->immediate_data);
+ break;
+ case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE:
+ if (roce_get_bit(cqe->cqe_byte_4,
+ CQE_BYTE_4_IMM_INDICATOR_S)) {
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = le32_to_cpu(
+ cqe->immediate_data);
+ } else {
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+ }
+ break;
+ default:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ /* Update tail pointer, record wr_id */
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M,
+ CQE_BYTE_20_SL_S);
+ wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20,
+ CQE_BYTE_20_REMOTE_QPN_M,
+ CQE_BYTE_20_REMOTE_QPN_S);
+ wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20,
+ CQE_BYTE_20_GRH_PRESENT_S) ?
+ IB_WC_GRH : 0);
+ wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28,
+ CQE_BYTE_28_P_KEY_IDX_M,
+ CQE_BYTE_28_P_KEY_IDX_S);
+ }
+
+ return 0;
+}
+
+int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct hns_roce_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+ int ret = 0;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled);
+ if (ret)
+ break;
+ }
+
+ if (npolled) {
+ hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index,
+ &to_hr_dev(ibcq->device)->cq_db_lock);
+ }
+
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+
+ if (ret == 0 || ret == -EAGAIN)
+ return npolled;
+ else
+ return ret;
+}
+
static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt,
enum hns_roce_qp_state cur_state,
@@ -2307,9 +2644,12 @@ struct hns_roce_hw hns_roce_hw_v1 = {
.set_gid = hns_roce_v1_set_gid,
.set_mac = hns_roce_v1_set_mac,
.set_mtu = hns_roce_v1_set_mtu,
+ .write_cqc = hns_roce_v1_write_cqc,
.modify_qp = hns_roce_v1_modify_qp,
.query_qp = hns_roce_v1_query_qp,
.destroy_qp = hns_roce_v1_destroy_qp,
.post_send = hns_roce_v1_post_send,
.post_recv = hns_roce_v1_post_recv,
+ .req_notify_cq = hns_roce_v1_req_notify_cq,
+ .poll_cq = hns_roce_v1_poll_cq,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 917985c..ba81540 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -10,6 +10,8 @@
#ifndef _HNS_ROCE_HW_V1_H
#define _HNS_ROCE_HW_V1_H
+#define CQ_STATE_VALID 2
+
#define HNS_ROCE_V1_MAX_PD_NUM 0x8000
#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000
#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000
@@ -85,6 +87,22 @@
#define HNS_ROCE_ODB_EXTEND_MODE 1
#define HNS_ROCE_CQE_QPN_MASK 0x3ffff
+#define HNS_ROCE_CQE_STATUS_MASK 0x1f
+#define HNS_ROCE_CQE_OPCODE_MASK 0xf
+
+#define HNS_ROCE_CQE_SUCCESS 0x00
+#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01
+#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02
+#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03
+#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04
+#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05
+#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06
+#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07
+#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08
+#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09
+#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a
+#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b
+#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c
#define QP1C_CFGN_OFFSET 0x28
#define PHY_PORT_OFFSET 0x8
@@ -95,17 +113,114 @@
#define QKEY_VAL 0x80010000
#define SDB_INV_CNT_OFFSET 8
+struct hns_roce_cq_context {
+ u32 cqc_byte_4;
+ u32 cq_bt_l;
+ u32 cqc_byte_12;
+ u32 cur_cqe_ba0_l;
+ u32 cqc_byte_20;
+ u32 cqe_tptr_addr_l;
+ u32 cur_cqe_ba1_l;
+ u32 cqc_byte_32;
+};
+
+#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0
+#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \
+ (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S)
+
+#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16
+#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \
+ (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S)
+
+#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0
+#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \
+ (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S)
+
+#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20
+#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \
+ (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S)
+
+#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24
+#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \
+ (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S)
+
+#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0
+#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \
+ (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S)
+
+#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16
+#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \
+ (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S)
+
+#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8
+#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \
+ (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S)
+
+#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0
+#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \
+ (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S)
+
+#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9
+
+#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8
+#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14
+#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15
+
+#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16
+#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \
+ (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S)
+
struct hns_roce_cqe {
u32 cqe_byte_4;
+ union {
+ u32 r_key;
+ u32 immediate_data;
+ };
+ u32 byte_cnt;
u32 cqe_byte_16;
+ u32 cqe_byte_20;
+ u32 s_mac_l;
+ u32 cqe_byte_28;
+ u32 reserved;
};
#define CQE_BYTE_4_OWNER_S 7
#define CQE_BYTE_4_SQ_RQ_FLAG_S 14
+#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8
+#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \
+ (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S)
+
+#define CQE_BYTE_4_WQE_INDEX_S 16
+#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S)
+
+#define CQE_BYTE_4_OPERATION_TYPE_S 0
+#define CQE_BYTE_4_OPERATION_TYPE_M \
+ (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S)
+
+#define CQE_BYTE_4_IMM_INDICATOR_S 15
+
#define CQE_BYTE_16_LOCAL_QPN_S 0
#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S)
+#define CQE_BYTE_20_PORT_NUM_S 26
+#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S)
+
+#define CQE_BYTE_20_SL_S 24
+#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S)
+
+#define CQE_BYTE_20_REMOTE_QPN_S 0
+#define CQE_BYTE_20_REMOTE_QPN_M \
+ (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S)
+
+#define CQE_BYTE_20_GRH_PRESENT_S 29
+
+#define CQE_BYTE_28_P_KEY_IDX_S 16
+#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S)
+
+#define CQ_DB_REQ_NOT_SOL 0
+#define CQ_DB_REQ_NOT (1 << 16)
+
struct hns_roce_wqe_ctrl_seg {
__be32 sgl_pa_h;
__be32 flag;
@@ -314,6 +429,8 @@ struct hns_roce_sqp_context {
#define HNS_ROCE_WQE_OPCODE_SEND (0<<16)
#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16)
#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16)
+#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16)
+#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16)
#define HNS_ROCE_WQE_OPCODE_MASK (15<<16)
struct hns_roce_qp_context {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index d2f8da5..ad838cf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -588,6 +588,9 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
(1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ULL << IB_USER_VERBS_CMD_CREATE_QP) |
(1ULL << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
@@ -622,6 +625,12 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->post_send = hr_dev->hw->post_send;
ib_dev->post_recv = hr_dev->hw->post_recv;
+ /* CQ */
+ ib_dev->create_cq = hns_roce_ib_create_cq;
+ ib_dev->destroy_cq = hns_roce_ib_destroy_cq;
+ ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq;
+ ib_dev->poll_cq = hr_dev->hw->poll_cq;
+
ret = ib_register_device(ib_dev, NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
@@ -914,6 +923,7 @@ static int hns_roce_probe(struct platform_device *pdev)
ret = -EIO;
goto error_failed_get_cfg;
}
+
ret = hns_roce_get_cfg(hr_dev);
if (ret) {
dev_err(dev, "Get Configuration failed!\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h
index 73c3f0c..8222b93 100644
--- a/drivers/infiniband/hw/hns/hns_roce_user.h
+++ b/drivers/infiniband/hw/hns/hns_roce_user.h
@@ -10,6 +10,10 @@
#ifndef _HNS_ROCE_USER_H
#define _HNS_ROCE_USER_H
+struct hns_roce_ib_create_cq {
+ __u64 buf_addr;
+};
+
struct hns_roce_ib_create_qp {
__u64 buf_addr;
__u8 log_sq_bb_count;
--
1.9.1
^ permalink raw reply related
* [PATCH v5 19/21] IB/hns: Add memory region operation support
From: Lijun Ou @ 2016-04-23 10:26 UTC (permalink / raw)
To: dledford, sean.hefty, hal.rosenstock, davem, jeffrey.t.kirsher,
jiri, ogerlitz, linuxarm
Cc: linux-rdma, linux-kernel, netdev, gongyangming, xiaokun,
tangchaofei, oulijun, haifeng.wei, yisen.zhuang, yankejian,
lisheng011, charles.chenxin
In-Reply-To: <1461407219-72027-1-git-send-email-oulijun@huawei.com>
This patch was mainly for implemention of memory region.
Memory Registration provides mechanisms that allow Consumers
to describe a set of virtually contiguous memory locations or
a set of physically contiguous memory locations.
MR operations includes as follows:
1. get dma MR in kernel mode
2. get MR in user mode
3. deregister MR
In addition that, the locations of some functions was adjusted
in some files.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu(Xavier) <xavier.huwei@huawei.com>
---
drivers/infiniband/hw/hns/hns_roce_cmd.h | 9 +
drivers/infiniband/hw/hns/hns_roce_device.h | 46 ++++-
drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 156 +++++++++++++++++
drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 103 ++++++++++++
drivers/infiniband/hw/hns/hns_roce_icm.c | 2 +-
drivers/infiniband/hw/hns/hns_roce_icm.h | 1 +
drivers/infiniband/hw/hns/hns_roce_main.c | 7 +
drivers/infiniband/hw/hns/hns_roce_mr.c | 252 ++++++++++++++++++++++++++++
drivers/infiniband/hw/hns/hns_roce_qp.c | 1 +
9 files changed, 575 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 9583546..4de4cfc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -13,6 +13,14 @@
#include <linux/dma-mapping.h>
enum {
+ /* TPT commands */
+ HNS_ROCE_CMD_SW2HW_MPT = 0xd,
+ HNS_ROCE_CMD_HW2SW_MPT = 0xf,
+
+ /* CQ commands */
+ HNS_ROCE_CMD_SW2HW_CQ = 0x16,
+ HNS_ROCE_CMD_HW2SW_CQ = 0x17,
+
/* QP/EE commands */
HNS_ROCE_CMD_RST2INIT_QP = 0x19,
HNS_ROCE_CMD_INIT2RTR_QP = 0x1a,
@@ -28,6 +36,7 @@ enum {
enum {
HNS_ROCE_CMD_TIME_CLASS_A = 10000,
+ HNS_ROCE_CMD_TIME_CLASS_B = 10000,
HNS_ROCE_CMD_TIME_CLASS_C = 10000,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 21698c5..38d1099 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -34,8 +34,11 @@
#define HNS_ROCE_MIN_CQE_NUM 0x40
#define HNS_ROCE_MIN_WQE_NUM 0x20
+/* Hardware specification only for v1 engine */
+#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
+#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
+
#define HNS_ROCE_MAX_IRQ_NUM 34
-#define HNS_ROCE_MAX_PORTS 6
#define HNS_ROCE_COMP_VEC_NUM 32
@@ -51,13 +54,25 @@
#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
+#define MR_TYPE_MR 0x00
+#define MR_TYPE_DMA 0x03
+
#define PKEY_ID 0xffff
#define NODE_DESC_SIZE 64
+
+#define SERV_TYPE_RC 0
+#define SERV_TYPE_RD 1
+#define SERV_TYPE_UC 2
+#define SERV_TYPE_UD 3
+
#define ADDR_SHIFT_12 12
#define ADDR_SHIFT_32 32
#define ADDR_SHIFT_44 44
+#define PAGES_SHIFT_8 8
#define PAGES_SHIFT_16 16
+#define PAGES_SHIFT_24 24
+#define PAGES_SHIFT_32 32
enum hns_roce_qp_state {
HNS_ROCE_QP_STATE_RST = 0,
@@ -201,6 +216,23 @@ struct hns_roce_mtt {
int page_shift;
};
+/* Only support 4K page size for mr register */
+#define MR_SIZE_4K 0
+
+struct hns_roce_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ u64 iova; /* MR's virtual orignal addr */
+ u64 size; /* Address range of MR */
+ u32 key; /* Key of MR */
+ u32 pd; /* PD num of MR */
+ u32 access;/* Access permission of MR */
+ int enabled; /* MR's active status */
+ int type; /* MR's register type */
+ u64 *pbl_buf;/* MR's PBL space */
+ dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
+};
+
struct hns_roce_mr_table {
struct hns_roce_bitmap mtpt_bitmap;
struct hns_roce_buddy mtt_buddy;
@@ -471,6 +503,7 @@ struct hns_roce_hw {
void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
enum ib_mtu mtu);
+ int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr, int mtpt_idx);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
dma_addr_t dma_handle, int nent, u32 vector);
@@ -545,6 +578,11 @@ static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
return container_of(ibah, struct hns_roce_ah, ibah);
}
+static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct hns_roce_mr, ibmr);
+}
+
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -641,6 +679,12 @@ int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, u32 *pdn);
void hns_roce_pd_free(struct hns_roce_dev *hr_dev, u32 pdn);
int hns_roce_dealloc_pd(struct ib_pd *pd);
+struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int hns_roce_dereg_mr(struct ib_mr *ibmr);
+
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 2006bfd..f3604f3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1028,6 +1028,158 @@ void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
phy_port * PHY_PORT_OFFSET);
}
+int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, int mtpt_idx)
+{
+ struct hns_roce_v1_mpt_entry *mpt_entry;
+ struct scatterlist *sg;
+ u64 *pages;
+ int entry;
+ int i;
+
+ /* MPT filled into mailbox buf */
+ mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M,
+ MPT_BYTE_4_KEY_STATE_S, KEY_VALID);
+ roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M,
+ MPT_BYTE_4_KEY_S, mr->key);
+ roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M,
+ MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K);
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0);
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S,
+ (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0);
+ roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M,
+ MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type);
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0);
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S,
+ (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S,
+ (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S,
+ (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S,
+ 0);
+ roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0);
+
+ roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
+ MPT_BYTE_12_PBL_ADDR_H_S, 0);
+ roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
+ MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
+
+ mpt_entry->virt_addr_l = (u32)mr->iova;
+ mpt_entry->virt_addr_h = (u32)(mr->iova >> ADDR_SHIFT_32);
+ mpt_entry->length = (u32)mr->size;
+
+ roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
+ MPT_BYTE_28_PD_S, mr->pd);
+ roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M,
+ MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx);
+ roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
+ MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
+
+ /* DMA momery regsiter */
+ if (mr->type == MR_TYPE_DMA)
+ return 0;
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+ pages[i] = ((u64)sg_dma_address(sg)) >> ADDR_SHIFT_12;
+
+ /* Directly record to MTPT table firstly 7 entry */
+ if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM)
+ break;
+ i++;
+ }
+
+ /* Register user mr */
+ for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) {
+ switch (i) {
+ case 0:
+ mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i]));
+ roce_set_field(mpt_entry->mpt_byte_36,
+ MPT_BYTE_36_PA0_H_M,
+ MPT_BYTE_36_PA0_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
+ break;
+ case 1:
+ roce_set_field(mpt_entry->mpt_byte_36,
+ MPT_BYTE_36_PA1_L_M,
+ MPT_BYTE_36_PA1_L_S,
+ cpu_to_le32((u32)(pages[i])));
+ roce_set_field(mpt_entry->mpt_byte_40,
+ MPT_BYTE_40_PA1_H_M,
+ MPT_BYTE_40_PA1_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
+ break;
+ case 2:
+ roce_set_field(mpt_entry->mpt_byte_40,
+ MPT_BYTE_40_PA2_L_M,
+ MPT_BYTE_40_PA2_L_S,
+ cpu_to_le32((u32)(pages[i])));
+ roce_set_field(mpt_entry->mpt_byte_44,
+ MPT_BYTE_44_PA2_H_M,
+ MPT_BYTE_44_PA2_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
+ break;
+ case 3:
+ roce_set_field(mpt_entry->mpt_byte_44,
+ MPT_BYTE_44_PA3_L_M,
+ MPT_BYTE_44_PA3_L_S,
+ cpu_to_le32((u32)(pages[i])));
+ roce_set_field(mpt_entry->mpt_byte_48,
+ MPT_BYTE_48_PA3_H_M,
+ MPT_BYTE_48_PA3_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8)));
+ break;
+ case 4:
+ mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
+ roce_set_field(mpt_entry->mpt_byte_56,
+ MPT_BYTE_56_PA4_H_M,
+ MPT_BYTE_56_PA4_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
+ break;
+ case 5:
+ roce_set_field(mpt_entry->mpt_byte_56,
+ MPT_BYTE_56_PA5_L_M,
+ MPT_BYTE_56_PA5_L_S,
+ cpu_to_le32((u32)(pages[i])));
+ roce_set_field(mpt_entry->mpt_byte_60,
+ MPT_BYTE_60_PA5_H_M,
+ MPT_BYTE_60_PA5_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
+ break;
+ case 6:
+ roce_set_field(mpt_entry->mpt_byte_60,
+ MPT_BYTE_60_PA6_L_M,
+ MPT_BYTE_60_PA6_L_S,
+ cpu_to_le32((u32)(pages[i])));
+ roce_set_field(mpt_entry->mpt_byte_64,
+ MPT_BYTE_64_PA6_H_M,
+ MPT_BYTE_64_PA6_H_S,
+ cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
+ break;
+ default:
+ break;
+ }
+ }
+
+ free_page((unsigned long) pages);
+
+ mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr);
+
+ roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
+ MPT_BYTE_12_PBL_ADDR_H_S,
+ ((u32)(mr->pbl_dma_addr >> ADDR_SHIFT_32)));
+
+ return 0;
+}
+
static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
{
return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@@ -2636,6 +2788,8 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
return 0;
}
+struct hns_roce_v1_priv hr_v1_priv;
+
struct hns_roce_hw hns_roce_hw_v1 = {
.reset = hns_roce_v1_reset,
.hw_profile = hns_roce_v1_profile,
@@ -2644,6 +2798,7 @@ struct hns_roce_hw hns_roce_hw_v1 = {
.set_gid = hns_roce_v1_set_gid,
.set_mac = hns_roce_v1_set_mac,
.set_mtu = hns_roce_v1_set_mtu,
+ .write_mtpt = hns_roce_v1_write_mtpt,
.write_cqc = hns_roce_v1_write_cqc,
.modify_qp = hns_roce_v1_modify_qp,
.query_qp = hns_roce_v1_query_qp,
@@ -2652,4 +2807,5 @@ struct hns_roce_hw hns_roce_hw_v1 = {
.post_recv = hns_roce_v1_post_recv,
.req_notify_cq = hns_roce_v1_req_notify_cq,
.poll_cq = hns_roce_v1_poll_cq,
+ .priv = &hr_v1_priv,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index ba81540..405c629 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -86,6 +86,8 @@
#define HNS_ROCE_ODB_EXTEND_MODE 1
+#define KEY_VALID 0x02
+
#define HNS_ROCE_CQE_QPN_MASK 0x3ffff
#define HNS_ROCE_CQE_STATUS_MASK 0x1f
#define HNS_ROCE_CQE_OPCODE_MASK 0xf
@@ -106,6 +108,7 @@
#define QP1C_CFGN_OFFSET 0x28
#define PHY_PORT_OFFSET 0x8
+#define MTPT_IDX_SHIFT 16
#define ALL_PORT_VAL_OPEN 0x3f
#define POL_TIME_INTERVAL_VAL 0x80
#define SLEEP_TIME_INTERVAL 20
@@ -221,6 +224,106 @@ struct hns_roce_cqe {
#define CQ_DB_REQ_NOT_SOL 0
#define CQ_DB_REQ_NOT (1 << 16)
+struct hns_roce_v1_mpt_entry {
+ u32 mpt_byte_4;
+ u32 pbl_addr_l;
+ u32 mpt_byte_12;
+ u32 virt_addr_l;
+ u32 virt_addr_h;
+ u32 length;
+ u32 mpt_byte_28;
+ u32 pa0_l;
+ u32 mpt_byte_36;
+ u32 mpt_byte_40;
+ u32 mpt_byte_44;
+ u32 mpt_byte_48;
+ u32 pa4_l;
+ u32 mpt_byte_56;
+ u32 mpt_byte_60;
+ u32 mpt_byte_64;
+};
+
+#define MPT_BYTE_4_KEY_STATE_S 0
+#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S)
+
+#define MPT_BYTE_4_KEY_S 8
+#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S)
+
+#define MPT_BYTE_4_PAGE_SIZE_S 16
+#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S)
+
+#define MPT_BYTE_4_MW_TYPE_S 20
+
+#define MPT_BYTE_4_MW_BIND_ENABLE_S 21
+
+#define MPT_BYTE_4_OWN_S 22
+
+#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24
+#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \
+ (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S)
+
+#define MPT_BYTE_4_REMOTE_ATOMIC_S 26
+#define MPT_BYTE_4_LOCAL_WRITE_S 27
+#define MPT_BYTE_4_REMOTE_WRITE_S 28
+#define MPT_BYTE_4_REMOTE_READ_S 29
+#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30
+#define MPT_BYTE_4_ADDRESS_TYPE_S 31
+
+#define MPT_BYTE_12_PBL_ADDR_H_S 0
+#define MPT_BYTE_12_PBL_ADDR_H_M \
+ (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S)
+
+#define MPT_BYTE_12_MW_BIND_COUNTER_S 17
+#define MPT_BYTE_12_MW_BIND_COUNTER_M \
+ (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S)
+
+#define MPT_BYTE_28_PD_S 0
+#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S)
+
+#define MPT_BYTE_28_L_KEY_IDX_L_S 16
+#define MPT_BYTE_28_L_KEY_IDX_L_M \
+ (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S)
+
+#define MPT_BYTE_36_PA0_H_S 0
+#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S)
+
+#define MPT_BYTE_36_PA1_L_S 8
+#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S)
+
+#define MPT_BYTE_40_PA1_H_S 0
+#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S)
+
+#define MPT_BYTE_40_PA2_L_S 16
+#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S)
+
+#define MPT_BYTE_44_PA2_H_S 0
+#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S)
+
+#define MPT_BYTE_44_PA3_L_S 24
+#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S)
+
+#define MPT_BYTE_48_PA3_H_S 0
+#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S)
+
+#define MPT_BYTE_56_PA4_H_S 0
+#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S)
+
+#define MPT_BYTE_56_PA5_L_S 8
+#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S)
+
+#define MPT_BYTE_60_PA5_H_S 0
+#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S)
+
+#define MPT_BYTE_60_PA6_L_S 16
+#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S)
+
+#define MPT_BYTE_64_PA6_H_S 0
+#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S)
+
+#define MPT_BYTE_64_L_KEY_IDX_H_S 24
+#define MPT_BYTE_64_L_KEY_IDX_H_M \
+ (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S)
+
struct hns_roce_wqe_ctrl_seg {
__be32 sgl_pa_h;
__be32 flag;
diff --git a/drivers/infiniband/hw/hns/hns_roce_icm.c b/drivers/infiniband/hw/hns/hns_roce_icm.c
index 4ac06be..e63a03b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_icm.c
+++ b/drivers/infiniband/hw/hns/hns_roce_icm.c
@@ -274,7 +274,7 @@ int hns_roce_unmap_icm(struct hns_roce_dev *hr_dev,
{
struct device *dev = &hr_dev->pdev->dev;
unsigned long end = 0;
- unsigned long flag;
+ unsigned long flags;
void __iomem *bt_cmd;
uint32_t bt_cmd_val[2];
u32 bt_cmd_h_val = 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_icm.h b/drivers/infiniband/hw/hns/hns_roce_icm.h
index f4f4782..de59eb7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_icm.h
+++ b/drivers/infiniband/hw/hns/hns_roce_icm.h
@@ -32,6 +32,7 @@ enum {
enum {
HNS_ROCE_ICM_PAGE_SHIFT = 12,
+ HNS_ROCE_ICM_PAGE_SIZE = 1 << HNS_ROCE_ICM_PAGE_SHIFT,
};
struct hns_roce_icm_chunk {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index ad838cf..25a83a9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -588,6 +588,8 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
(1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ULL << IB_USER_VERBS_CMD_REG_MR) |
+ (1ULL << IB_USER_VERBS_CMD_DEREG_MR) |
(1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ULL << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) |
@@ -631,6 +633,11 @@ int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq;
ib_dev->poll_cq = hr_dev->hw->poll_cq;
+ /* MR */
+ ib_dev->get_dma_mr = hns_roce_get_dma_mr;
+ ib_dev->reg_user_mr = hns_roce_reg_user_mr;
+ ib_dev->dereg_mr = hns_roce_dereg_mr;
+
ret = ib_register_device(ib_dev, NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index edf0155..1ef3342 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -12,8 +12,36 @@
#include <linux/slab.h>
#include <linux/platform_device.h>
#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
#include "hns_roce_icm.h"
+static u32 hw_index_to_key(u32 ind)
+{
+ return (ind >> 24) | (ind << 8);
+}
+
+static u32 key_to_hw_index(u32 key)
+{
+ return (key << 24) | (key >> 8);
+}
+
+static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ int mpt_index)
+{
+ return hns_roce_cmd(hr_dev, mailbox->dma, mpt_index, 0,
+ HNS_ROCE_CMD_SW2HW_MPT, HNS_ROCE_CMD_TIME_CLASS_B);
+}
+
+static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ int mpt_index)
+{
+ return hns_roce_cmd_box(hr_dev, 0, mailbox ? mailbox->dma : 0,
+ mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
+ HNS_ROCE_CMD_TIME_CLASS_B);
+}
+
static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
u32 *seg)
{
@@ -179,6 +207,106 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
mtt->first_seg + (1 << mtt->order) - 1);
}
+int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size,
+ u32 access, int npages, struct hns_roce_mr *mr)
+{
+ u32 index = 0;
+ int ret = 0;
+ struct device *dev = &hr_dev->pdev->dev;
+
+ /* Allocate a key for mr from mr_table */
+ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
+ if (ret == -1)
+ return -ENOMEM;
+
+ mr->iova = iova; /* MR va starting addr */
+ mr->size = size; /* MR addr range */
+ mr->pd = pd; /* MR num */
+ mr->access = access; /* MR access permit */
+ mr->enabled = 0; /* MR active status */
+ mr->key = hw_index_to_key(index); /* MR key */
+
+ if (size == ~0ull) {
+ mr->type = MR_TYPE_DMA;
+ mr->pbl_buf = NULL;
+ mr->pbl_dma_addr = 0;
+ } else {
+ mr->type = MR_TYPE_MR;
+ mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
+ &(mr->pbl_dma_addr),
+ GFP_KERNEL);
+ if (!mr->pbl_buf) {
+ dev_err(dev, "alloc coherent pbl pages failed.\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ int ret;
+
+ if (mr->enabled) {
+ ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
+ & (hr_dev->caps.num_mtpts - 1));
+ if (ret)
+ dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+ }
+
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+ key_to_hw_index(mr->key));
+}
+
+int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ int ret;
+ int mtpt_idx = key_to_hw_index(mr->key);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+
+ /* Prepare ICM entry memory */
+ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ if (ret)
+ return ret;
+
+ /* Applicate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_table;
+ }
+
+ ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ if (ret) {
+ dev_err(dev, "Write mtpt fail!\n");
+ goto err_page;
+ }
+
+ ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ if (ret) {
+ dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
+ goto err_cmd;
+ }
+
+ mr->enabled = 1;
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_cmd:
+err_page:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+err_table:
+ hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ return ret;
+}
+
static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, u32 start_index,
u32 npages, u64 *page_list)
@@ -307,6 +435,38 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
}
+struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ int ret = 0;
+ struct hns_roce_mr *mr = NULL;
+
+ mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+ if (mr == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* Allocate memory region key */
+ ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
+ ~0ULL, acc, 0, mr);
+ if (ret)
+ goto err_free;
+
+ ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
+ if (ret)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ hns_roce_mr_free(to_hr_dev(pd->device), mr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct ib_umem *umem)
{
@@ -345,3 +505,95 @@ out:
free_page((unsigned long) pages);
return ret;
}
+
+static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr,
+ struct ib_umem *umem)
+{
+ int i = 0;
+ int entry;
+ struct scatterlist *sg;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12;
+ i++;
+ }
+
+ /* Memory barrier */
+ mb();
+
+ return 0;
+}
+
+struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_mr *mr = NULL;
+ int ret = 0;
+ int n = 0;
+
+ mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
+ if (IS_ERR(mr->umem)) {
+ ret = PTR_ERR(mr->umem);
+ goto err_free;
+ }
+
+ n = ib_umem_page_count(mr->umem);
+ if (mr->umem->page_size != HNS_ROCE_ICM_PAGE_SIZE) {
+ dev_err(dev, "Just support 4K page size but is 0x%x now!\n",
+ mr->umem->page_size);
+ }
+
+ if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
+ dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n",
+ length);
+ goto err_free;
+ }
+
+ ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
+ access_flags, n, mr);
+ if (ret)
+ goto err_umem;
+
+ ret = hns_roce_ib_umem_write_mr(mr, mr->umem);
+ if (ret)
+ goto err_mr;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+
+ return &mr->ibmr;
+
+err_mr:
+ hns_roce_mr_free(hr_dev, mr);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+int hns_roce_dereg_mr(struct ib_mr *ibmr)
+{
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ hns_roce_mr_free(to_hr_dev(ibmr->device), mr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 7401c1a..17ac9f4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -13,6 +13,7 @@
#include <rdma/ib_pack.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
#include "hns_roce_icm.h"
#include "hns_roce_user.h"
--
1.9.1
^ permalink raw reply related
* [PATCH v5 20/21] IB/hns: Kconfig and Makefile for RoCE module
From: Lijun Ou @ 2016-04-23 10:26 UTC (permalink / raw)
To: dledford, sean.hefty, hal.rosenstock, davem, jeffrey.t.kirsher,
jiri, ogerlitz, linuxarm
Cc: linux-rdma, linux-kernel, netdev, gongyangming, xiaokun,
tangchaofei, oulijun, haifeng.wei, yisen.zhuang, yankejian,
lisheng011, charles.chenxin
In-Reply-To: <1461407219-72027-1-git-send-email-oulijun@huawei.com>
This patch add Kconfig and Makefile for building RoCE module.
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu(Xavier) <xavier.huwei@huawei.com>
---
drivers/infiniband/Kconfig | 1 +
drivers/infiniband/hw/Makefile | 1 +
drivers/infiniband/hw/hns/Kconfig | 10 ++++++++++
drivers/infiniband/hw/hns/Makefile | 9 +++++++++
4 files changed, 21 insertions(+)
create mode 100644 drivers/infiniband/hw/hns/Kconfig
create mode 100644 drivers/infiniband/hw/hns/Makefile
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 6425c0e..726a4ca 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -74,6 +74,7 @@ source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
+source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index c7ad0a4..223eb78 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
+obj-$(CONFIG_INFINIBAND_HISILICON_HNS) += hns/
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
new file mode 100644
index 0000000..c47c168
--- /dev/null
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -0,0 +1,10 @@
+config INFINIBAND_HISILICON_HNS
+ tristate "Hisilicon Hns ROCE Driver"
+ depends on NET_VENDOR_HISILICON
+ depends on ARM64 && HNS && HNS_DSAF && HNS_ENET
+ ---help---
+ This is a ROCE/RDMA driver for the Hisilicon RoCE engine. The engine
+ is used in Hisilicon Hi1610 and more further ICT SoC.
+
+ To compile this driver as a module, choose M here: the module
+ will be called hns-roce.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
new file mode 100644
index 0000000..404a700
--- /dev/null
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the HISILICON RoCE drivers.
+#
+
+obj-$(CONFIG_INFINIBAND_HISILICON_HNS) += hns-roce.o
+hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \
+ hns_roce_ah.o hns_roce_icm.o hns_roce_mr.o hns_roce_qp.o \
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_hw_v1.o
+
--
1.9.1
^ permalink raw reply related
* [PATCH] net: dummy: remove note about being Y by default
From: Ivan Babrou @ 2016-04-23 12:58 UTC (permalink / raw)
To: netdev; +Cc: trivial, David S. Miller, David Ahern, John W. Linville
Signed-off-by: Ivan Babrou <ivan@cloudflare.com>
---
drivers/net/Kconfig | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 2a1ba62b..bd86a23 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -62,9 +62,8 @@ config DUMMY
this device is consigned into oblivion) with a configurable IP
address. It is most commonly used in order to make your currently
inactive SLIP address seem like a real address for local programs.
- If you use SLIP or PPP, you might want to say Y here. Since this
- thing often comes in handy, the default is Y. It won't enlarge your
- kernel either. What a deal. Read about it in the Network
+ If you use SLIP or PPP, you might want to say Y here. It won't
+ enlarge your kernel. What a deal. Read about it in the Network
Administrator's Guide, available from
<http://www.tldp.org/docs.html#guide>.
--
2.8.1
^ permalink raw reply related
* Re: [PATCH net-next 12/12] net/mlx5e: Disable link up on INIT HCA command
From: Ido Schimmel @ 2016-04-23 16:00 UTC (permalink / raw)
To: Saeed Mahameed
Cc: David S. Miller, netdev, Or Gerlitz, Tal Alon, Eran Ben Elisha,
Eugenia Emantayev, jiri, eladr
In-Reply-To: <1461351647-27412-13-git-send-email-saeedm@mellanox.com>
Hi,
Fri, Apr 22, 2016 at 10:00:47PM IDT, saeedm@mellanox.com wrote:
>From: Eran Ben Elisha <eranbe@mellanox.com>
>
>Disable link up when initializing the HCA. Link up/down will be changed
>using (Ports Administrative and Operational Status Register) PAOS
>commands.
>
>If link layer is Ethernet, up/down the link in ndo_open/stop. If link
>layer is IB, up/down the link as part of the mlx5 IB add/remove flow.
>
>Before this patch the link was up when the HCA was initialized, now the
>driver will manage the link.
>
>Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
>Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>---
mutt suggests I sent you the following patches a while ago:
net/mlx5e: Set port administrative status in ndo_{open, close}
net/mlx5e: Initialize port administrative status to down
So I'm curious as to why you didn't follow up on them and instead came
up with this patch.
When I initially debugged this I pointed you to the fact that the
firmware also needs to be patched, as setting the administrative status
down via PAOS doesn't really do anything. The operational status will
remain up. I just tested this patchset with the latest release firmware
(12.14.2036) and I'm bumping into the same problem.
Until this is properly fixed the mlx5 driver is blacklisted in our test:
https://github.com/jpirko/lnst/blob/master/recipes/switchdev/basic-001-links.py#L19
Also, why is this directed at net-next?
> drivers/infiniband/hw/mlx5/main.c | 11 +++++++++++
> drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++
> drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 4 ++--
> .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 ++--
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++
> drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++
> drivers/net/ethernet/mellanox/mlx5/core/port.c | 5 +++--
> include/linux/mlx5/port.h | 3 ++-
> 8 files changed, 33 insertions(+), 7 deletions(-)
>
[...]
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
>index b2db180..f083797 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
>@@ -202,12 +202,12 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
>
> mlx5_query_port_admin_status(mdev, &ps);
> if (ps == MLX5_PORT_UP)
>- mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
>+ mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN, 1);
Why is this needed? The link doesn't need to go through training when
setting PFC. It's only needed for PAUSE frames when auto-negotiation is
on, but you don't support that anyway.
Thanks.
>
> ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
>
> if (ps == MLX5_PORT_UP)
>- mlx5_set_port_admin_status(mdev, MLX5_PORT_UP);
>+ mlx5_set_port_admin_status(mdev, MLX5_PORT_UP, 1);
>
> return ret;
> }
^ permalink raw reply
* Re: [PATCH net-next 2/9] libnl: nla_put_le64(): align on a 64-bit area
From: Alexander Aring @ 2016-04-23 17:05 UTC (permalink / raw)
To: Eric Dumazet, Nicolas Dichtel
Cc: netdev, davem, linux-kernel, linux-wpan, pablo, kaber, kadlec,
pshelar, kuznet, jmorris, yoshfuji, netfilter-devel, dev,
steffen.klassert, herbert, bsingharora
In-Reply-To: <1461343880.7627.34.camel@edumazet-glaptop3.roam.corp.google.com>
Hi,
On 04/22/2016 06:51 PM, Eric Dumazet wrote:
> On Fri, 2016-04-22 at 17:31 +0200, Nicolas Dichtel wrote:
>> nla_data() is now aligned on a 64-bit area.
>>
>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>> ---
>> include/net/netlink.h | 8 +++++---
>> include/net/nl802154.h | 6 ++++++
>> net/ieee802154/nl802154.c | 13 ++++++++-----
>> 3 files changed, 19 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/net/netlink.h b/include/net/netlink.h
>> index 6f51a8a06498..7f6b99483ab7 100644
>> --- a/include/net/netlink.h
>> +++ b/include/net/netlink.h
>> @@ -878,14 +878,16 @@ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value)
>> }
>>
>> /**
>> - * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer
>> + * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and align it
>> * @skb: socket buffer to add attribute to
>> * @attrtype: attribute type
>> * @value: numeric value
>> + * @padattr: attribute type for the padding
>> */
>> -static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value)
>> +static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value,
>> + int padattr)
>> {
>> - return nla_put(skb, attrtype, sizeof(__le64), &value);
>> + return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr);
>> }
>>
>
> But _why_ is it needed ?
>
> nla_put() has no alignment assumptions, it simply copies 8 bytes.
>
> Seems this is going too far.
>
if this is really needed, then nla_put_u64/be64/etc need to be changed also,
or? The function nla_put_u64 is the same like nla_put_le64 but with __le64
type, so sparse will not complain about that.
- Alex
^ permalink raw reply
* [PATCH] off-by-one in DecodeQ931
From: Toby DiPasquale @ 2016-04-23 17:08 UTC (permalink / raw)
To: pablo, Patrick McHardy, kadlec, davem, netfilter-devel, coreteam,
netdev
I was reviewing the H.323 conntrack helper in the kernel when I came
across what appears to be an off-by-one error in the DecodeQ931
function. The MessageType field of the Q931 record is assigned and p
is incremented, but the corresponding decrement to sz is missing,
leading the sz variable to be one more than it should be. This patch
decrements sz so it is the proper value going into the parsing of the
information elements.
Signed-off-by: Toby DiPasquale <toby@cbcg.net>
--
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c
b/net/netfilter/nf_conntrack_h323_asn1.c
index bcd5ed6..68b1557 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -849,6 +849,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
if (sz < 1)
return H323_ERROR_BOUND;
q931->MessageType = *p++;
+ sz--;
PRINT("MessageType = %02X\n", q931->MessageType);
if (*p & 0x80) {
p++;
--
Toby DiPasquale
^ permalink raw reply related
* Re: [PATCH net-next 2/9] libnl: nla_put_le64(): align on a 64-bit area
From: Alexander Aring @ 2016-04-23 17:28 UTC (permalink / raw)
To: Eric Dumazet, Nicolas Dichtel
Cc: netdev, davem, linux-kernel, linux-wpan, pablo, kaber, kadlec,
pshelar, kuznet, jmorris, yoshfuji, netfilter-devel, dev,
steffen.klassert, herbert, bsingharora
In-Reply-To: <60b94d1a-23a2-ec18-7f2b-2ba21946af03@pengutronix.de>
Hi,
On 04/23/2016 07:05 PM, Alexander Aring wrote:
...
>
> if this is really needed, then nla_put_u64/be64/etc need to be changed also,
Okay, I found PATCH 3/9 do it for be64, but what's about u64?
- Alex
^ permalink raw reply
* [PATCH net] net/mlx4_en: fix spurious timestamping callbacks
From: Eric Dumazet @ 2016-04-23 18:35 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Eugenia Emantayev
From: Eric Dumazet <edumazet@google.com>
When multiple skb are TX-completed in a row, we might incorrectly keep
a timestamp of a prior skb and cause extra work.
Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Willem de Bruijn <willemb@google.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index c0d7b7296236..a386f047c1af 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -405,7 +405,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
u32 packets = 0;
u32 bytes = 0;
int factor = priv->cqe_factor;
- u64 timestamp = 0;
int done = 0;
int budget = priv->tx_work_limit;
u32 last_nr_txbb;
@@ -445,9 +444,12 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
do {
+ u64 timestamp = 0;
+
txbbs_skipped += last_nr_txbb;
ring_index = (ring_index + last_nr_txbb) & size_mask;
- if (ring->tx_info[ring_index].ts_requested)
+
+ if (unlikely(ring->tx_info[ring_index].ts_requested))
timestamp = mlx4_en_get_cqe_ts(cqe);
/* free next descriptor */
^ permalink raw reply related
* [PATCH net-next 2/3] ila: xlat changes
From: Tom Herbert @ 2016-04-23 18:46 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
In-Reply-To: <1461437217-1568717-1-git-send-email-tom@herbertland.com>
Change model of xlat to be used only for input where lookup is done on
the locator part of an address (comparing to locator_match as key
in rhashtable). This is needed for checksum neutral translation
which obfuscates the low order 16 bits of the identifier. It also
permits hosts to be in muliple ILA domains (each locator can map
to a different SIR address). A check is also added to disallow
translating non-ILA addresses (check of type in identifier).
Signed-off-by: Tom Herbert <tom@herbertland.com>
---
net/ipv6/ila/ila_xlat.c | 102 ++++++++++++++++--------------------------------
1 file changed, 34 insertions(+), 68 deletions(-)
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 075c782..d17d429 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -11,9 +11,7 @@
struct ila_xlat_params {
struct ila_params ip;
- struct ila_identifier identifier;
int ifindex;
- unsigned int dir;
};
struct ila_map {
@@ -66,35 +64,29 @@ static __always_inline void __ila_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static inline u32 ila_identifier_hash(struct ila_identifier ident)
+static inline u32 ila_locator_hash(struct ila_locator loc)
{
- u32 *v = (u32 *)ident.v32;
+ u32 *v = (u32 *)loc.v32;
return jhash_2words(v[0], v[1], hashrnd);
}
static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
- struct ila_identifier ident)
+ struct ila_locator loc)
{
- return &ilan->locks[ila_identifier_hash(ident) & ilan->locks_mask];
+ return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask];
}
static inline int ila_cmp_wildcards(struct ila_map *ila,
- struct ila_addr *iaddr, int ifindex,
- unsigned int dir)
+ struct ila_addr *iaddr, int ifindex)
{
- return (ila->xp.ip.locator_match.v64 &&
- ila->xp.ip.locator_match.v64 != iaddr->loc.v64) ||
- (ila->xp.ifindex && ila->xp.ifindex != ifindex) ||
- !(ila->xp.dir & dir);
+ return (ila->xp.ifindex && ila->xp.ifindex != ifindex);
}
static inline int ila_cmp_params(struct ila_map *ila,
struct ila_xlat_params *xp)
{
- return (ila->xp.ip.locator_match.v64 != xp->ip.locator_match.v64) ||
- (ila->xp.ifindex != xp->ifindex) ||
- (ila->xp.dir != xp->dir);
+ return (ila->xp.ifindex != xp->ifindex);
}
static int ila_cmpfn(struct rhashtable_compare_arg *arg,
@@ -102,16 +94,13 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg,
{
const struct ila_map *ila = obj;
- return (ila->xp.identifier.v64 != *(__be64 *)arg->key);
+ return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key);
}
static inline int ila_order(struct ila_map *ila)
{
int score = 0;
- if (ila->xp.ip.locator_match.v64)
- score += 1 << 0;
-
if (ila->xp.ifindex)
score += 1 << 1;
@@ -121,7 +110,7 @@ static inline int ila_order(struct ila_map *ila)
static const struct rhashtable_params rht_params = {
.nelem_hint = 1024,
.head_offset = offsetof(struct ila_map, node),
- .key_offset = offsetof(struct ila_map, xp.identifier),
+ .key_offset = offsetof(struct ila_map, xp.ip.locator_match),
.key_len = sizeof(u64), /* identifier */
.max_size = 1048576,
.min_size = 256,
@@ -140,11 +129,9 @@ static struct genl_family ila_nl_family = {
};
static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
- [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
- [ILA_ATTR_DIR] = { .type = NLA_U32, },
};
static int parse_nl_config(struct genl_info *info,
@@ -152,10 +139,6 @@ static int parse_nl_config(struct genl_info *info,
{
memset(xp, 0, sizeof(*xp));
- if (info->attrs[ILA_ATTR_IDENTIFIER])
- xp->identifier.v64 = (__force __be64)nla_get_u64(
- info->attrs[ILA_ATTR_IDENTIFIER]);
-
if (info->attrs[ILA_ATTR_LOCATOR])
xp->ip.locator.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR]);
@@ -167,24 +150,20 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
- if (info->attrs[ILA_ATTR_DIR])
- xp->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
-
return 0;
}
/* Must be called with rcu readlock */
static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
int ifindex,
- unsigned int dir,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->ident,
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc,
rht_params);
while (ila) {
- if (!ila_cmp_wildcards(ila, iaddr, ifindex, dir))
+ if (!ila_cmp_wildcards(ila, iaddr, ifindex))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -198,7 +177,8 @@ static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &xp->identifier,
+ ila = rhashtable_lookup_fast(&ilan->rhash_table,
+ &xp->ip.locator_match,
rht_params);
while (ila) {
if (!ila_cmp_params(ila, xp))
@@ -226,14 +206,14 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, int dir);
+static int ila_xlat_addr(struct sk_buff *skb);
static unsigned int
ila_nf_input(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- ila_xlat_addr(skb, ILA_DIR_IN);
+ ila_xlat_addr(skb);
return NF_ACCEPT;
}
@@ -250,7 +230,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head;
- spinlock_t *lock = ila_get_lock(ilan, xp->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = 0, order;
if (!ilan->hooks_registered) {
@@ -271,20 +251,19 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
ila->xp = *xp;
- if (xp->ip.locator_match.v64) {
- /* Precompute checksum difference for translation since we
- * know both the old identifier and the new one.
- */
- ila->xp.ip.csum_diff = compute_csum_diff8(
- (__be32 *)&xp->ip.locator_match,
- (__be32 *)&xp->ip.locator);
- }
+ /* Precompute checksum difference for translation since we
+ * know both the old identifier and the new one.
+ */
+ ila->xp.ip.csum_diff = compute_csum_diff8(
+ (__be32 *)&xp->ip.locator_match,
+ (__be32 *)&xp->ip.locator);
order = ila_order(ila);
spin_lock(lock);
- head = rhashtable_lookup_fast(&ilan->rhash_table, &xp->identifier,
+ head = rhashtable_lookup_fast(&ilan->rhash_table,
+ &xp->ip.locator_match,
rht_params);
if (!head) {
/* New entry for the rhash_table */
@@ -335,13 +314,13 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head, *prev;
- spinlock_t *lock = ila_get_lock(ilan, xp->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = -ENOENT;
spin_lock(lock);
head = rhashtable_lookup_fast(&ilan->rhash_table,
- &xp->identifier, rht_params);
+ &xp->ip.locator_match, rht_params);
ila = head;
prev = NULL;
@@ -423,14 +402,11 @@ static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
{
- if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER,
- (__force u64)ila->xp.identifier.v64) ||
- nla_put_u64(msg, ILA_ATTR_LOCATOR,
+ if (nla_put_u64(msg, ILA_ATTR_LOCATOR,
(__force u64)ila->xp.ip.locator.v64) ||
nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH,
(__force u64)ila->xp.ip.locator_match.v64) ||
- nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_DIR, ila->xp.dir))
+ nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex))
return -1;
return 0;
@@ -619,22 +595,24 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, int dir)
+static int ila_xlat_addr(struct sk_buff *skb)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
- size_t nhoff;
/* Assumes skb contains a valid IPv6 header that is pulled */
- nhoff = sizeof(struct ipv6hdr);
+ if (!ila_addr_is_ila(iaddr)) {
+ /* Type indicates this is not an ILA address */
+ return 0;
+ }
rcu_read_lock();
- ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, dir, ilan);
+ ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
ila_update_ipv6_locator(skb, &ila->xp.ip);
@@ -643,18 +621,6 @@ static int ila_xlat_addr(struct sk_buff *skb, int dir)
return 0;
}
-int ila_xlat_incoming(struct sk_buff *skb)
-{
- return ila_xlat_addr(skb, ILA_DIR_IN);
-}
-EXPORT_SYMBOL(ila_xlat_incoming);
-
-int ila_xlat_outgoing(struct sk_buff *skb)
-{
- return ila_xlat_addr(skb, ILA_DIR_OUT);
-}
-EXPORT_SYMBOL(ila_xlat_outgoing);
-
int ila_xlat_init(void)
{
int ret;
--
2.8.0.rc2
^ permalink raw reply related
* [PATCH net-next 0/3] ila: Support for checksum neutral translations
From: Tom Herbert @ 2016-04-23 18:46 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
This patch set updates ILA to support draft-herbert-nvo3-ila-02. The
primary addition is support checksum neutral ILA translation.
This allows address to be performed and still keep any transport
layer checksums that include the addresses in their pseudo header to
still be correct without the translator needing to parse L4.
Other items are:
- Structures for ILA addresses, identifiers, locators
- Disallow translation on non-ILA addresses (check by
type in identifier).
- Change xlat (nf_input) to translates solely based
on matching locators not identifiers (since identifiers
are not obfuscated by checksum neutral).
- Side effect if above is that multiple ILA domains are
supported. Each local locator can map to a different
SIR address (ILA domain), and each domain defines its
own identifier space.
Tested: Ran TCP_RR with 200 cnxs. ILA performance is slightly better
than previously since we are not longer parsing L4 for checksum
handling. I amd seeing about 1% performance overhead. Also ran
TCP_STREAM and tested non-ILA address (type=0) are not translated.
v2: Fix complilation errors
Tom Herbert (3):
ila: Add struct definitions and helpers
ila: xlat changes
ila: add checksum neutral ILA translations
include/uapi/linux/ila.h | 7 ++
net/ipv6/ila/ila.h | 79 ++++++++++++++++++++++-
net/ipv6/ila/ila_common.c | 81 +++++++++++++++++++++--
net/ipv6/ila/ila_lwt.c | 49 ++++++++++----
net/ipv6/ila/ila_xlat.c | 160 +++++++++++++++++++---------------------------
5 files changed, 259 insertions(+), 117 deletions(-)
--
2.8.0.rc2
^ permalink raw reply
* [PATCH net-next 1/3] ila: Add struct definitions and helpers
From: Tom Herbert @ 2016-04-23 18:46 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
In-Reply-To: <1461437217-1568717-1-git-send-email-tom@herbertland.com>
Add structures for identifiers, locators, and an ila address which
is composed of a locator and identifier and in6_addr can be cast to
it. This includes a three bit type field and enums for the types defined
in ILA I-D.
In ILA lwt don't allow user to set a translation for a non-ILA
address (type of identifier is zero meaning it is an IID). This also
requires that the destination prefix is at least 65 bytes (64
bit locator and first byte of identifier).
Signed-off-by: Tom Herbert <tom@herbertland.com>
---
net/ipv6/ila/ila.h | 67 ++++++++++++++++++++++--
net/ipv6/ila/ila_common.c | 11 ++--
net/ipv6/ila/ila_lwt.c | 39 +++++++++-----
net/ipv6/ila/ila_xlat.c | 126 +++++++++++++++++++++++-----------------------
4 files changed, 161 insertions(+), 82 deletions(-)
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index 28542cb..f532967 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -23,9 +23,70 @@
#include <net/protocol.h>
#include <uapi/linux/ila.h>
+struct ila_locator {
+ union {
+ __u8 v8[8];
+ __be16 v16[4];
+ __be32 v32[2];
+ __be64 v64;
+ };
+};
+
+struct ila_identifier {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 __space:5;
+ u8 type:3;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 type:3;
+ u8 __space:5;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ u8 __space2[7];
+ };
+ __u8 v8[8];
+ __be16 v16[4];
+ __be32 v32[2];
+ __be64 v64;
+ };
+};
+
+enum {
+ ILA_ATYPE_IID = 0,
+ ILA_ATYPE_LUID,
+ ILA_ATYPE_VIRT_V4,
+ ILA_ATYPE_VIRT_UNI_V6,
+ ILA_ATYPE_VIRT_MULTI_V6,
+ ILA_ATYPE_RSVD_1,
+ ILA_ATYPE_RSVD_2,
+ ILA_ATYPE_RSVD_3,
+};
+
+struct ila_addr {
+ union {
+ struct in6_addr addr;
+ struct {
+ struct ila_locator loc;
+ struct ila_identifier ident;
+ };
+ };
+};
+
+static inline struct ila_addr *ila_a2i(struct in6_addr *addr)
+{
+ return (struct ila_addr *)addr;
+}
+
+static inline bool ila_addr_is_ila(struct ila_addr *iaddr)
+{
+ return (iaddr->ident.type != ILA_ATYPE_IID);
+}
+
struct ila_params {
- __be64 locator;
- __be64 locator_match;
+ struct ila_locator locator;
+ struct ila_locator locator_match;
__wsum csum_diff;
};
@@ -38,7 +99,7 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
return csum_partial(diff, sizeof(diff), 0);
}
-void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
int ila_lwt_init(void);
void ila_lwt_fini(void);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 3061305..c3078d0 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -15,17 +15,20 @@
static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
{
- if (*(__be64 *)&ip6h->daddr == p->locator_match)
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ if (iaddr->loc.v64 == p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ return compute_csum_diff8((__be32 *)&iaddr->loc,
(__be32 *)&p->locator);
}
-void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
{
__wsum diff;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
size_t nhoff = sizeof(struct ipv6hdr);
/* First update checksum */
@@ -68,7 +71,7 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
}
/* Now change destination address */
- *(__be64 *)&ip6h->daddr = p->locator;
+ iaddr->loc = p->locator;
}
static int __init ila_init(void)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 2ae3c4f..71c79fc 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
return dst->lwtstate->orig_output(net, sk, skb);
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
return dst->lwtstate->orig_input(skb);
@@ -64,11 +64,26 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
size_t encap_len = sizeof(*p);
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
+ struct ila_addr *iaddr;
int ret;
if (family != AF_INET6)
return -EINVAL;
+ if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ iaddr = (struct ila_addr *)&cfg6->fc_dst;
+
+ if (!ila_addr_is_ila(iaddr)) {
+ /* Don't allow setting a translation for a non-ILA address */
+ return -EINVAL;
+ }
+
ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
ila_nl_policy);
if (ret < 0)
@@ -84,16 +99,14 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
newts->len = encap_len;
p = ila_params_lwtunnel(newts);
- p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
+ p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
- if (cfg6->fc_dst_len > sizeof(__be64)) {
- /* Precompute checksum difference for translation since we
- * know both the old locator and the new one.
- */
- p->locator_match = *(__be64 *)&cfg6->fc_dst;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
- }
+ /* Precompute checksum difference for translation since we
+ * know both the old locator and the new one.
+ */
+ p->locator_match = iaddr->loc;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match, (__be32 *)&p->locator);
newts->type = LWTUNNEL_ENCAP_ILA;
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
@@ -109,7 +122,7 @@ static int ila_fill_encap_info(struct sk_buff *skb,
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
- if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator))
+ if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64))
goto nla_put_failure;
return 0;
@@ -129,7 +142,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct ila_params *a_p = ila_params_lwtunnel(a);
struct ila_params *b_p = ila_params_lwtunnel(b);
- return (a_p->locator != b_p->locator);
+ return (a_p->locator.v64 != b_p->locator.v64);
}
static const struct lwtunnel_encap_ops ila_encap_ops = {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 0b03533..075c782 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -11,13 +11,13 @@
struct ila_xlat_params {
struct ila_params ip;
- __be64 identifier;
+ struct ila_identifier identifier;
int ifindex;
unsigned int dir;
};
struct ila_map {
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
struct rhash_head node;
struct ila_map __rcu *next;
struct rcu_head rcu;
@@ -66,31 +66,35 @@ static __always_inline void __ila_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static inline u32 ila_identifier_hash(__be64 identifier)
+static inline u32 ila_identifier_hash(struct ila_identifier ident)
{
- u32 *v = (u32 *)&identifier;
+ u32 *v = (u32 *)ident.v32;
return jhash_2words(v[0], v[1], hashrnd);
}
-static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier)
+static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
+ struct ila_identifier ident)
{
- return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask];
+ return &ilan->locks[ila_identifier_hash(ident) & ilan->locks_mask];
}
-static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc,
- int ifindex, unsigned int dir)
+static inline int ila_cmp_wildcards(struct ila_map *ila,
+ struct ila_addr *iaddr, int ifindex,
+ unsigned int dir)
{
- return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) ||
- (ila->p.ifindex && ila->p.ifindex != ifindex) ||
- !(ila->p.dir & dir);
+ return (ila->xp.ip.locator_match.v64 &&
+ ila->xp.ip.locator_match.v64 != iaddr->loc.v64) ||
+ (ila->xp.ifindex && ila->xp.ifindex != ifindex) ||
+ !(ila->xp.dir & dir);
}
-static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p)
+static inline int ila_cmp_params(struct ila_map *ila,
+ struct ila_xlat_params *xp)
{
- return (ila->p.ip.locator_match != p->ip.locator_match) ||
- (ila->p.ifindex != p->ifindex) ||
- (ila->p.dir != p->dir);
+ return (ila->xp.ip.locator_match.v64 != xp->ip.locator_match.v64) ||
+ (ila->xp.ifindex != xp->ifindex) ||
+ (ila->xp.dir != xp->dir);
}
static int ila_cmpfn(struct rhashtable_compare_arg *arg,
@@ -98,17 +102,17 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg,
{
const struct ila_map *ila = obj;
- return (ila->p.identifier != *(__be64 *)arg->key);
+ return (ila->xp.identifier.v64 != *(__be64 *)arg->key);
}
static inline int ila_order(struct ila_map *ila)
{
int score = 0;
- if (ila->p.ip.locator_match)
+ if (ila->xp.ip.locator_match.v64)
score += 1 << 0;
- if (ila->p.ifindex)
+ if (ila->xp.ifindex)
score += 1 << 1;
return score;
@@ -117,7 +121,7 @@ static inline int ila_order(struct ila_map *ila)
static const struct rhashtable_params rht_params = {
.nelem_hint = 1024,
.head_offset = offsetof(struct ila_map, node),
- .key_offset = offsetof(struct ila_map, p.identifier),
+ .key_offset = offsetof(struct ila_map, xp.identifier),
.key_len = sizeof(u64), /* identifier */
.max_size = 1048576,
.min_size = 256,
@@ -144,42 +148,43 @@ static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
};
static int parse_nl_config(struct genl_info *info,
- struct ila_xlat_params *p)
+ struct ila_xlat_params *xp)
{
- memset(p, 0, sizeof(*p));
+ memset(xp, 0, sizeof(*xp));
if (info->attrs[ILA_ATTR_IDENTIFIER])
- p->identifier = (__force __be64)nla_get_u64(
+ xp->identifier.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_IDENTIFIER]);
if (info->attrs[ILA_ATTR_LOCATOR])
- p->ip.locator = (__force __be64)nla_get_u64(
+ xp->ip.locator.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR]);
if (info->attrs[ILA_ATTR_LOCATOR_MATCH])
- p->ip.locator_match = (__force __be64)nla_get_u64(
+ xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
if (info->attrs[ILA_ATTR_IFINDEX])
- p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
+ xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
if (info->attrs[ILA_ATTR_DIR])
- p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
+ xp->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
return 0;
}
/* Must be called with rcu readlock */
-static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
+static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
int ifindex,
unsigned int dir,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params);
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->ident,
+ rht_params);
while (ila) {
- if (!ila_cmp_wildcards(ila, loc, ifindex, dir))
+ if (!ila_cmp_wildcards(ila, iaddr, ifindex, dir))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -188,15 +193,15 @@ static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
}
/* Must be called with rcu readlock */
-static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p,
+static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &xp->identifier,
rht_params);
while (ila) {
- if (!ila_cmp_params(ila, p))
+ if (!ila_cmp_params(ila, xp))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -241,11 +246,11 @@ static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
},
};
-static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
+static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head;
- spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->identifier);
int err = 0, order;
if (!ilan->hooks_registered) {
@@ -264,22 +269,22 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
if (!ila)
return -ENOMEM;
- ila->p = *p;
+ ila->xp = *xp;
- if (p->ip.locator_match) {
+ if (xp->ip.locator_match.v64) {
/* Precompute checksum difference for translation since we
* know both the old identifier and the new one.
*/
- ila->p.ip.csum_diff = compute_csum_diff8(
- (__be32 *)&p->ip.locator_match,
- (__be32 *)&p->ip.locator);
+ ila->xp.ip.csum_diff = compute_csum_diff8(
+ (__be32 *)&xp->ip.locator_match,
+ (__be32 *)&xp->ip.locator);
}
order = ila_order(ila);
spin_lock(lock);
- head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ head = rhashtable_lookup_fast(&ilan->rhash_table, &xp->identifier,
rht_params);
if (!head) {
/* New entry for the rhash_table */
@@ -289,7 +294,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
struct ila_map *tila = head, *prev = NULL;
do {
- if (!ila_cmp_params(tila, p)) {
+ if (!ila_cmp_params(tila, xp)) {
err = -EEXIST;
goto out;
}
@@ -326,23 +331,23 @@ out:
return err;
}
-static int ila_del_mapping(struct net *net, struct ila_xlat_params *p)
+static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head, *prev;
- spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->identifier);
int err = -ENOENT;
spin_lock(lock);
head = rhashtable_lookup_fast(&ilan->rhash_table,
- &p->identifier, rht_params);
+ &xp->identifier, rht_params);
ila = head;
prev = NULL;
while (ila) {
- if (ila_cmp_params(ila, p)) {
+ if (ila_cmp_params(ila, xp)) {
prev = ila;
ila = rcu_dereference_protected(ila->next,
lockdep_is_held(lock));
@@ -404,14 +409,14 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
int err;
- err = parse_nl_config(info, &p);
+ err = parse_nl_config(info, &xp);
if (err)
return err;
- ila_del_mapping(net, &p);
+ ila_del_mapping(net, &xp);
return 0;
}
@@ -419,13 +424,13 @@ static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
{
if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER,
- (__force u64)ila->p.identifier) ||
+ (__force u64)ila->xp.identifier.v64) ||
nla_put_u64(msg, ILA_ATTR_LOCATOR,
- (__force u64)ila->p.ip.locator) ||
+ (__force u64)ila->xp.ip.locator.v64) ||
nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH,
- (__force u64)ila->p.ip.locator_match) ||
- nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir))
+ (__force u64)ila->xp.ip.locator_match.v64) ||
+ nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
+ nla_put_u32(msg, ILA_ATTR_DIR, ila->xp.dir))
return -1;
return 0;
@@ -457,11 +462,11 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
struct net *net = genl_info_net(info);
struct ila_net *ilan = net_generic(net, ila_net_id);
struct sk_buff *msg;
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
struct ila_map *ila;
int ret;
- ret = parse_nl_config(info, &p);
+ ret = parse_nl_config(info, &xp);
if (ret)
return ret;
@@ -471,7 +476,7 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
rcu_read_lock();
- ila = ila_lookup_by_params(&p, ilan);
+ ila = ila_lookup_by_params(&xp, ilan);
if (ila) {
ret = ila_dump_info(ila,
info->snd_portid,
@@ -620,21 +625,18 @@ static int ila_xlat_addr(struct sk_buff *skb, int dir)
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ila_net *ilan = net_generic(net, ila_net_id);
- __be64 identifier, locator_match;
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
size_t nhoff;
/* Assumes skb contains a valid IPv6 header that is pulled */
- identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8];
- locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0];
nhoff = sizeof(struct ipv6hdr);
rcu_read_lock();
- ila = ila_lookup_wildcards(identifier, locator_match,
- skb->dev->ifindex, dir, ilan);
+ ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, dir, ilan);
if (ila)
- update_ipv6_locator(skb, &ila->p.ip);
+ ila_update_ipv6_locator(skb, &ila->xp.ip);
rcu_read_unlock();
--
2.8.0.rc2
^ permalink raw reply related
* [PATCH net-next 3/3] ila: add checksum neutral ILA translations
From: Tom Herbert @ 2016-04-23 18:46 UTC (permalink / raw)
To: davem, netdev; +Cc: kernel-team
In-Reply-To: <1461437217-1568717-1-git-send-email-tom@herbertland.com>
Support checksum neutral ILA as described in the ILA draft. The low
order 16 bits of the identifier are used to contain the checksum
adjustment value.
The csum-mode parameter is added to described checksum processing. There
are three values:
- adjust transport checksum (previous behavior)
- do checksum neutral mapping
- do nothing
On output the csum-mode in the ila_params is checked and acted on. If
mode is checksum neutral mapping then to mapping and set C-bit.
On input, C-bit is checked. If it is set checksum-netural mapping is
done (regardless of csum-mode in ila params) and C-bit will be cleared.
If it is not set then action in csum-mode is taken.
Signed-off-by: Tom Herbert <tom@herbertland.com>
---
include/uapi/linux/ila.h | 7 +++++
net/ipv6/ila/ila.h | 16 ++++++++--
net/ipv6/ila/ila_common.c | 74 +++++++++++++++++++++++++++++++++++++++++++++--
net/ipv6/ila/ila_lwt.c | 14 +++++++--
net/ipv6/ila/ila_xlat.c | 16 +++++-----
5 files changed, 112 insertions(+), 15 deletions(-)
diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h
index abde7bb..8ac61b8 100644
--- a/include/uapi/linux/ila.h
+++ b/include/uapi/linux/ila.h
@@ -14,6 +14,7 @@ enum {
ILA_ATTR_LOCATOR_MATCH, /* u64 */
ILA_ATTR_IFINDEX, /* s32 */
ILA_ATTR_DIR, /* u32 */
+ ILA_ATTR_CSUM_MODE, /* u8 */
__ILA_ATTR_MAX,
};
@@ -34,4 +35,10 @@ enum {
#define ILA_DIR_IN (1 << 0)
#define ILA_DIR_OUT (1 << 1)
+enum {
+ ILA_CSUM_ADJUST_TRANSPORT,
+ ILA_CSUM_NEUTRAL_MAP,
+ ILA_CSUM_NO_ACTION,
+};
+
#endif /* _UAPI_LINUX_ILA_H */
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index f532967..d08fd2d 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -36,11 +36,13 @@ struct ila_identifier {
union {
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
- u8 __space:5;
+ u8 __space:4;
+ u8 csum_neutral:1;
u8 type:3;
#elif defined(__BIG_ENDIAN_BITFIELD)
u8 type:3;
- u8 __space:5;
+ u8 csum_neutral:1;
+ u8 __space:4;
#else
#error "Adjust your <asm/byteorder.h> defines"
#endif
@@ -64,6 +66,8 @@ enum {
ILA_ATYPE_RSVD_3,
};
+#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
+
struct ila_addr {
union {
struct in6_addr addr;
@@ -88,6 +92,7 @@ struct ila_params {
struct ila_locator locator;
struct ila_locator locator_match;
__wsum csum_diff;
+ u8 csum_mode;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
@@ -99,8 +104,15 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
return csum_partial(diff, sizeof(diff), 0);
}
+static inline bool ila_csum_neutral_set(struct ila_identifier ident)
+{
+ return !!(ident.csum_neutral);
+}
+
void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+void ila_init_saved_csum(struct ila_params *p);
+
int ila_lwt_init(void);
void ila_lwt_fini(void);
int ila_xlat_init(void);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index c3078d0..0e94042 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -17,21 +17,50 @@ static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
{
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
- if (iaddr->loc.v64 == p->locator_match.v64)
+ if (p->locator_match.v64)
return p->csum_diff;
else
return compute_csum_diff8((__be32 *)&iaddr->loc,
(__be32 *)&p->locator);
}
-void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+static void ila_csum_do_neutral(struct ila_addr *iaddr,
+ struct ila_params *p)
+{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
+ __wsum diff, fval;
+
+ /* Check if checksum adjust value has been cached */
+ if (p->locator_match.v64) {
+ diff = p->csum_diff;
+ } else {
+ diff = compute_csum_diff8((__be32 *)iaddr,
+ (__be32 *)&p->locator);
+ }
+
+ fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
+ ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG);
+
+ diff = csum_add(diff, fval);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+
+ /* Flip the csum-neutral bit. Either we are doing a SIR->ILA
+ * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method
+ * and the C-bit is not set, or we are doing an ILA-SIR
+ * tranlsation and the C-bit is set.
+ */
+ iaddr->ident.csum_neutral ^= 1;
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
{
__wsum diff;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
size_t nhoff = sizeof(struct ipv6hdr);
- /* First update checksum */
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
@@ -74,6 +103,45 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
iaddr->loc = p->locator;
}
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ /* First deal with the transport checksum */
+ if (ila_csum_neutral_set(iaddr->ident)) {
+ /* C-bit is set in the locator indicating that this
+ * is a locator being translated to a SIR address.
+ * Perform (receiver) checksum-neutral translation.
+ */
+ ila_csum_do_neutral(iaddr, p);
+ } else {
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ ila_csum_do_neutral(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
+ }
+ }
+
+ /* Now change destination address */
+ iaddr->loc = p->locator;
+}
+
+void ila_init_saved_csum(struct ila_params *p)
+{
+ if (!p->locator_match.v64)
+ return;
+
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match,
+ (__be32 *)&p->locator);
+}
+
static int __init ila_init(void)
{
int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 71c79fc..e81e39a 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -53,6 +53,7 @@ drop:
static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+ [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
static int ila_build_state(struct net_device *dev, struct nlattr *nla,
@@ -79,8 +80,10 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
iaddr = (struct ila_addr *)&cfg6->fc_dst;
- if (!ila_addr_is_ila(iaddr)) {
- /* Don't allow setting a translation for a non-ILA address */
+ if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation for a non-ILA address or checksum
+ * neutral flag to be set.
+ */
return -EINVAL;
}
@@ -108,6 +111,11 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
p->csum_diff = compute_csum_diff8(
(__be32 *)&p->locator_match, (__be32 *)&p->locator);
+ if (tb[ILA_ATTR_CSUM_MODE])
+ p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ ila_init_saved_csum(p);
+
newts->type = LWTUNNEL_ENCAP_ILA;
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
LWTUNNEL_STATE_INPUT_REDIRECT;
@@ -124,6 +132,8 @@ static int ila_fill_encap_info(struct sk_buff *skb,
if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64))
goto nla_put_failure;
+ if (nla_put_u64(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
+ goto nla_put_failure;
return 0;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index d17d429..c0323a2 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -132,6 +132,7 @@ static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
+ [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -147,6 +148,9 @@ static int parse_nl_config(struct genl_info *info,
xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
+ if (info->attrs[ILA_ATTR_CSUM_MODE])
+ xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
+
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -249,14 +253,9 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
if (!ila)
return -ENOMEM;
- ila->xp = *xp;
+ ila_init_saved_csum(&xp->ip);
- /* Precompute checksum difference for translation since we
- * know both the old identifier and the new one.
- */
- ila->xp.ip.csum_diff = compute_csum_diff8(
- (__be32 *)&xp->ip.locator_match,
- (__be32 *)&xp->ip.locator);
+ ila->xp = *xp;
order = ila_order(ila);
@@ -406,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
(__force u64)ila->xp.ip.locator.v64) ||
nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH,
(__force u64)ila->xp.ip.locator_match.v64) ||
- nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex))
+ nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
+ nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
return -1;
return 0;
--
2.8.0.rc2
^ permalink raw reply related
* Re: [PATCH net-next 12/12] net/mlx5e: Disable link up on INIT HCA command
From: Saeed Mahameed @ 2016-04-23 20:21 UTC (permalink / raw)
To: Ido Schimmel
Cc: Saeed Mahameed, David S. Miller, Linux Netdev List, Or Gerlitz,
Tal Alon, Eran Ben Elisha, Eugenia Emantayev, Jiri Pirko, eladr
In-Reply-To: <20160423160023.GA15013@colbert.Home>
On Sat, Apr 23, 2016 at 7:00 PM, Ido Schimmel <idosch@mellanox.com> wrote:
> mutt suggests I sent you the following patches a while ago:
>
> net/mlx5e: Set port administrative status in ndo_{open, close}
> net/mlx5e: Initialize port administrative status to down
>
> So I'm curious as to why you didn't follow up on them and instead came
> up with this patch.
>
> When I initially debugged this I pointed you to the fact that the
> firmware also needs to be patched, as setting the administrative status
> down via PAOS doesn't really do anything. The operational status will
> remain up. I just tested this patchset with the latest release firmware
> (12.14.2036) and I'm bumping into the same problem.
>
Hi Ido,
I do remember that patch, and back then i told you that your patch is
not cooked enough and that we were working on a solution:
1. you need to negotiate with the FW on if the FW can give up port
ownership to SW, you can find that in handle_hca_cap,
MLX5_CAP_GEN_MAX(dev, disable_link_up).
2. you need to configure the FW to give up port ownership via NVconfig
(mlxconfig tool) i think it is PORT_OWNER bit, Eran can elaborate
more if needed.
3.1 and 2 are required for mlx5_set_port_admin_status to take effect,
otherwise it is a NOOP.
Eran tested this patch and it works for him, maybe you missed step 2.
Step 2 will not be required in future FW (it will be the FW default).
Other than this patch, nothing more is required for driver to assume
ownership on port's link status.
> Until this is properly fixed the mlx5 driver is blacklisted in our test:
> https://github.com/jpirko/lnst/blob/master/recipes/switchdev/basic-001-links.py#L19
>
> Also, why is this directed at net-next?
>
it is kind of new behavior, and not a bug fix.
>> drivers/infiniband/hw/mlx5/main.c | 11 +++++++++++
>> drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++
>> drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 4 ++--
>> .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 ++--
>> drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++
>> drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++
>> drivers/net/ethernet/mellanox/mlx5/core/port.c | 5 +++--
>> include/linux/mlx5/port.h | 3 ++-
>> 8 files changed, 33 insertions(+), 7 deletions(-)
>>
>
> [...]
>
>>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
>>index b2db180..f083797 100644
>>--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
>>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
>>@@ -202,12 +202,12 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
>>
>> mlx5_query_port_admin_status(mdev, &ps);
>> if (ps == MLX5_PORT_UP)
>>- mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
>>+ mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN, 1);
>
> Why is this needed? The link doesn't need to go through training when
> setting PFC. It's only needed for PAUSE frames when auto-negotiation is
> on, but you don't support that anyway.
>
> Thanks.
>
The code was there before this patch, only the function prototype was changed.
Maybe you have a valid point here, but this need to be fixed in a
different patch, it has nothing to do with this one.
Eran and Team please check this on Sunday, I found nothing re this
matter in PRM.
^ permalink raw reply
* Re: [PATCH net] net/mlx4_en: fix spurious timestamping callbacks
From: Or Gerlitz @ 2016-04-23 20:23 UTC (permalink / raw)
To: Eric Dumazet, David Miller; +Cc: netdev, Eugenia Emantayev, Eran Ben Elisha
In-Reply-To: <1461436546.7627.45.camel@edumazet-glaptop3.roam.corp.google.com>
On Sat, Apr 23, 2016 at 9:35 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> When multiple skb are TX-completed in a row, we might incorrectly keep
> a timestamp of a prior skb and cause extra work.
>
> Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Willem de Bruijn <willemb@google.com>
Eric, was that supposed to be Reported-by: or Signed-off-by: ...
FWIW, Dave, I wanted to use the opportunity and comment that this week
many of us gonna be on Passover vacation, so response rate should be
lower than usual
> drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> index c0d7b7296236..a386f047c1af 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> @@ -405,7 +405,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
> u32 packets = 0;
> u32 bytes = 0;
> int factor = priv->cqe_factor;
> - u64 timestamp = 0;
> int done = 0;
> int budget = priv->tx_work_limit;
> u32 last_nr_txbb;
> @@ -445,9 +444,12 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
> new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
>
> do {
> + u64 timestamp = 0;
> +
> txbbs_skipped += last_nr_txbb;
> ring_index = (ring_index + last_nr_txbb) & size_mask;
> - if (ring->tx_info[ring_index].ts_requested)
> +
> + if (unlikely(ring->tx_info[ring_index].ts_requested))
> timestamp = mlx4_en_get_cqe_ts(cqe);
>
> /* free next descriptor */
>
>
^ permalink raw reply
* Re: [PATCH net-next 12/12] net/mlx5e: Disable link up on INIT HCA command
From: Or Gerlitz @ 2016-04-23 20:28 UTC (permalink / raw)
To: Saeed Mahameed
Cc: Ido Schimmel, Saeed Mahameed, David S. Miller, Linux Netdev List,
Or Gerlitz, Tal Alon, Eran Ben Elisha, Eugenia Emantayev,
Jiri Pirko, Elad Raz
In-Reply-To: <CALzJLG-AWFHnba=ysCAQLqCjFHo3L-sQTeW+NbpMg-X6PRtRPQ@mail.gmail.com>
On Sat, Apr 23, 2016 at 11:21 PM, Saeed Mahameed
<saeedm@dev.mellanox.co.il> wrote:
> On Sat, Apr 23, 2016 at 7:00 PM, Ido Schimmel <idosch@mellanox.com> wrote:
>> Also, why is this directed at net-next?
> it is kind of new behavior, and not a bug fix.
Saeed,
We should be aiming to patch the driver around this issue with
something that goes all the way back through -stable to the very 1st
upstream kernel supporting mlx5 EN --- the operational link doesn't go
down when instructed by the admin and this is an entry level
requirement from Ethernet of things (such as NIC and Switch ports).
^ permalink raw reply
* Re: [PATCH net-next 12/12] net/mlx5e: Disable link up on INIT HCA command
From: Saeed Mahameed @ 2016-04-23 21:24 UTC (permalink / raw)
To: Or Gerlitz
Cc: Ido Schimmel, Saeed Mahameed, David S. Miller, Linux Netdev List,
Or Gerlitz, Tal Alon, Eran Ben Elisha, Eugenia Emantayev,
Jiri Pirko, Elad Raz
In-Reply-To: <CAJ3xEMg_KFKRvCt5kBZzF_gmkLwg=2dZ=-FCjdbmgkX2-E0s3Q@mail.gmail.com>
On Sat, Apr 23, 2016 at 11:28 PM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
> On Sat, Apr 23, 2016 at 11:21 PM, Saeed Mahameed
> <saeedm@dev.mellanox.co.il> wrote:
>> On Sat, Apr 23, 2016 at 7:00 PM, Ido Schimmel <idosch@mellanox.com> wrote:
>
>>> Also, why is this directed at net-next?
>
>> it is kind of new behavior, and not a bug fix.
>
> Saeed,
>
> We should be aiming to patch the driver around this issue with
> something that goes all the way back through -stable to the very 1st
> upstream kernel supporting mlx5 EN --- the operational link doesn't go
> down when instructed by the admin and this is an entry level
> requirement from Ethernet of things (such as NIC and Switch ports).
Well, this feature requires updating mlx5_ifc, which was done in
earlier patch which added disable_link_up[0x1] bit,
I think it is doable, but it will introduce a conflict that Dave might
need to solve by not taking this bit from the patch I will send to net
if we decide to do so,
are you saying that i need to drop this patch and submit it to net ?
^ permalink raw reply
* Re: [PATCH net-next 12/12] net/mlx5e: Disable link up on INIT HCA command
From: Or Gerlitz @ 2016-04-23 21:28 UTC (permalink / raw)
To: Saeed Mahameed
Cc: Ido Schimmel, Saeed Mahameed, David S. Miller, Linux Netdev List,
Or Gerlitz, Tal Alon, Eran Ben Elisha, Eugenia Emantayev,
Jiri Pirko, Elad Raz
In-Reply-To: <CALzJLG8Jo5sk=CiHAUDF43RZDGQ1LKRbZftYDpzsO3QJsWx-uA@mail.gmail.com>
On Sun, Apr 24, 2016 at 12:24 AM, Saeed Mahameed
<saeedm@dev.mellanox.co.il> wrote:
> On Sat, Apr 23, 2016 at 11:28 PM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
>> On Sat, Apr 23, 2016 at 11:21 PM, Saeed Mahameed
>> <saeedm@dev.mellanox.co.il> wrote:
>>> On Sat, Apr 23, 2016 at 7:00 PM, Ido Schimmel <idosch@mellanox.com> wrote:
>>
>>>> Also, why is this directed at net-next?
>>
>>> it is kind of new behavior, and not a bug fix.
>>
>> Saeed,
>>
>> We should be aiming to patch the driver around this issue with
>> something that goes all the way back through -stable to the very 1st
>> upstream kernel supporting mlx5 EN --- the operational link doesn't go
>> down when instructed by the admin and this is an entry level
>> requirement from Ethernet of things (such as NIC and Switch ports).
>
> Well, this feature requires updating mlx5_ifc, which was done in
> earlier patch which added disable_link_up[0x1] bit,
> I think it is doable, but it will introduce a conflict that Dave might
> need to solve by not taking this bit from the patch I will send to net
> if we decide to do so,
> are you saying that i need to drop this patch and submit it to net ?
net-next is rebased over net and when needed conflicts are solved. Do
the right patch for net and we will take it from there. Please make
sure your patch/es meet the -stable criteria so they can be pushed
there as well.
Or.
^ permalink raw reply
* [PATCH net-next 0/2] net: ethernet: enc28j60: small improvements
From: Michael Heimpold @ 2016-04-23 21:43 UTC (permalink / raw)
To: Jonathan Cameron, Andrew F . Davis, Mark Brown, netdev,
Claudio Lanconelli
Cc: Michael Heimpold
This series of two patches adds the following improvements to the driver:
1) Rework the central SPI read function so that it is compatible with
SPI masters which only support half duplex transfers.
2) Add a device tree binding for the driver.
Michael Heimpold (2):
net: ethernet: enc28j60: support half-duplex SPI controllers
net: ethernet: enc28j60: add device tree support
.../devicetree/bindings/net/microchip-enc28j60.txt | 49 ++++++++++++++++++++++
drivers/net/ethernet/microchip/enc28j60.c | 27 +++++++++---
2 files changed, 70 insertions(+), 6 deletions(-)
create mode 100644 Documentation/devicetree/bindings/net/microchip-enc28j60.txt
--
2.5.0
^ permalink raw reply
* [PATCH net-next 1/2] net: ethernet: enc28j60: support half-duplex SPI controllers
From: Michael Heimpold @ 2016-04-23 21:43 UTC (permalink / raw)
To: Jonathan Cameron, Andrew F . Davis, Mark Brown, netdev,
Claudio Lanconelli
Cc: Michael Heimpold
In-Reply-To: <1461447800-11381-1-git-send-email-mhei@heimpold.de>
The current spi_read_buf function fails on SPI host masters which
are only half-duplex capable. Splitting the Tx and Rx part solves
this issue.
Tested on Raspberry Pi (full duplex) and I2SE Duckbill (half duplex).
Signed-off-by: Michael Heimpold <mhei@heimpold.de>
---
drivers/net/ethernet/microchip/enc28j60.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 86ea17e..b723622 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -89,22 +89,26 @@ spi_read_buf(struct enc28j60_net *priv, int len, u8 *data)
{
u8 *rx_buf = priv->spi_transfer_buf + 4;
u8 *tx_buf = priv->spi_transfer_buf;
- struct spi_transfer t = {
+ struct spi_transfer tx = {
.tx_buf = tx_buf,
+ .len = SPI_OPLEN,
+ };
+ struct spi_transfer rx = {
.rx_buf = rx_buf,
- .len = SPI_OPLEN + len,
+ .len = len,
};
struct spi_message msg;
int ret;
tx_buf[0] = ENC28J60_READ_BUF_MEM;
- tx_buf[1] = tx_buf[2] = tx_buf[3] = 0; /* don't care */
spi_message_init(&msg);
- spi_message_add_tail(&t, &msg);
+ spi_message_add_tail(&tx, &msg);
+ spi_message_add_tail(&rx, &msg);
+
ret = spi_sync(priv->spi, &msg);
if (ret == 0) {
- memcpy(data, &rx_buf[SPI_OPLEN], len);
+ memcpy(data, rx_buf, len);
ret = msg.status;
}
if (ret && netif_msg_drv(priv))
--
2.5.0
^ permalink raw reply related
* [PATCH net-next 2/2] net: ethernet: enc28j60: add device tree support
From: Michael Heimpold @ 2016-04-23 21:43 UTC (permalink / raw)
To: Jonathan Cameron, Andrew F . Davis, Mark Brown, netdev,
devicetree, Rob Herring, Pawel Moll, Mark Rutland, Ian Campbell,
Kumar Gala, Claudio Lanconelli
Cc: Michael Heimpold
In-Reply-To: <1461447800-11381-1-git-send-email-mhei@heimpold.de>
The following patch adds the required match table for device tree support
(and while at, fix the indent).
Also add the corresponding binding documentation file.
Signed-off-by: Michael Heimpold <mhei@heimpold.de>
---
.../devicetree/bindings/net/microchip-enc28j60.txt | 49 ++++++++++++++++++++++
drivers/net/ethernet/microchip/enc28j60.c | 13 +++++-
2 files changed, 61 insertions(+), 1 deletion(-)
create mode 100644 Documentation/devicetree/bindings/net/microchip-enc28j60.txt
diff --git a/Documentation/devicetree/bindings/net/microchip-enc28j60.txt b/Documentation/devicetree/bindings/net/microchip-enc28j60.txt
new file mode 100644
index 0000000..c472427
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/microchip-enc28j60.txt
@@ -0,0 +1,49 @@
+* Microchip ENC28J60
+
+This is a standalone 10 MBit ethernet controller with SPI interface.
+
+For each device connected to a SPI bus, define a child node within
+the SPI master node.
+
+Required properties:
+- compatible: Should be "microchip,enc28j60"
+- reg: Specify the SPI chip select the ENC28J60 is wired to
+- interrupts: Specify the interrupt and interrupt type (usually falling edge)
+
+Optional properties:
+- interrupt-parent: Specify the pHandle of the source interrupt
+- spi-max-frequency: Maximum frequency of the SPI bus when accessing the ENC28J60.
+ According to the ENC28J80 datasheet, the chip allows a maximum of 20 MHz, however,
+ board designs may need to limit this value.
+
+
+Example (for NXP i.MX28 with pin control stuff for GPIO irq):
+
+ ssp2: ssp@80014000 {
+ compatible = "fsl,imx28-spi";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2_pins_b &spi2_sck_cfg>;
+ status = "okay";
+
+ enc28j60: ethernet@0 {
+ compatible = "microchip,enc28j60";
+ pinctrl-names = "default";
+ pinctrl-0 = <&enc28j60_pins>;
+ reg = <0>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <3 IRQ_TYPE_EDGE_FALLING>;
+ spi-max-frequency = <12000000>;
+ };
+ };
+
+ pinctrl@80018000 {
+ enc28j60_pins: enc28j60_pins@0 {
+ reg = <0>;
+ fsl,pinmux-ids = <
+ MX28_PAD_AUART0_RTS__GPIO_3_3 /* Interrupt */
+ >;
+ fsl,drive-strength = <MXS_DRIVE_4mA>;
+ fsl,voltage = <MXS_VOLTAGE_HIGH>;
+ fsl,pull-up = <MXS_PULL_DISABLE>;
+ };
+ };
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index b723622..49635d7 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1634,9 +1634,20 @@ static int enc28j60_remove(struct spi_device *spi)
return 0;
}
+#ifdef CONFIG_OF
+static const struct of_device_id enc28j60_dt_ids[] = {
+ { .compatible = "microchip,enc28j60" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, enc28j60_dt_ids);
+#else
+#define enc28j60_dt_ids NULL
+#endif
+
static struct spi_driver enc28j60_driver = {
.driver = {
- .name = DRV_NAME,
+ .name = DRV_NAME,
+ .of_match_table = enc28j60_dt_ids,
},
.probe = enc28j60_probe,
.remove = enc28j60_remove,
--
2.5.0
^ permalink raw reply related
* [PATCH net-next] macvlan: fix failure during registration v3
From: Francesco Ruggeri @ 2016-04-23 22:03 UTC (permalink / raw)
To: netdev
Cc: Francesco Ruggeri, Eric W. Biederman, David S. Miller,
Mahesh Bandewar
If macvlan_common_newlink fails in register_netdevice after macvlan_init
then it decrements port->count twice, first in macvlan_uninit (from
register_netdevice or rollback_registered) and then again in
macvlan_common_newlink.
A similar problem may exist in the ipvlan driver.
This patch consolidates modifications to port->count into macvlan_init
and macvlan_uninit (thanks to Eric Biederman for suggesting this approach).
v3: remove macvtap specific bits.
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
---
drivers/net/macvlan.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 2bcf1f3..cb01023 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -795,6 +795,7 @@ static int macvlan_init(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
const struct net_device *lowerdev = vlan->lowerdev;
+ struct macvlan_port *port = vlan->port;
dev->state = (dev->state & ~MACVLAN_STATE_MASK) |
(lowerdev->state & MACVLAN_STATE_MASK);
@@ -812,6 +813,8 @@ static int macvlan_init(struct net_device *dev)
if (!vlan->pcpu_stats)
return -ENOMEM;
+ port->count += 1;
+
return 0;
}
@@ -1312,10 +1315,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- port->count += 1;
err = register_netdevice(dev);
if (err < 0)
- goto destroy_port;
+ return err;
dev->priv_flags |= IFF_MACVLAN;
err = netdev_upper_dev_link(lowerdev, dev);
@@ -1330,10 +1332,6 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
unregister_netdev:
unregister_netdevice(dev);
-destroy_port:
- port->count -= 1;
- if (!port->count)
- macvlan_port_destroy(lowerdev);
return err;
}
--
1.8.1.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox