* [RFC v1 15/19] RDMA/irdma: Add miscellaneous utility definitions
From: Shiraz Saleem @ 2019-02-15 17:11 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Add miscellaneous utility functions and headers.
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/osdep.h | 153 ++
drivers/infiniband/hw/irdma/protos.h | 118 ++
drivers/infiniband/hw/irdma/status.h | 70 +
drivers/infiniband/hw/irdma/utils.c | 2565 ++++++++++++++++++++++++++++++++++
4 files changed, 2906 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/osdep.h
create mode 100644 drivers/infiniband/hw/irdma/protos.h
create mode 100644 drivers/infiniband/hw/irdma/status.h
create mode 100644 drivers/infiniband/hw/irdma/utils.c
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
new file mode 100644
index 0000000..ade5536
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_OSDEP_H
+#define IRDMA_OSDEP_H
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/pci.h>
+#include <net/tcp.h>
+#include <crypto/hash.h>
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#define STATS_TIMER_DELAY 60000
+#define MAKEMASK(m, s) ((m) << (s))
+
+#define irdma_pr_err(fmt, args ...) \
+ pr_err("%s: "fmt, __func__, ## args)
+
+#define irdma_pr_info(fmt, args ...) \
+ pr_info("%s: " fmt, __func__, ## args)
+
+#define irdma_pr_warn(fmt, args ...) \
+ pr_warn("%s: " fmt, __func__, ## args)
+
+#define irdma_dev_err(dev, fmt, args ...) \
+ dev_err(to_device(dev), "%s: "fmt, __func__, ## args)
+
+#define irdma_dev_info(dev, fmt, args ...) \
+ dev_info(to_device(dev), "%s: "fmt, __func__, ## args)
+
+#define irdma_dev_warn(dev, fmt, args ...) \
+ dev_warn(to_device(dev), "%s: "fmt, __func__, ## args)
+
+#define to_device(ptr) \
+ (&((struct pci_dev *)((ptr)->hw->dev_context))->dev)
+
+#ifdef CONFIG_DYNAMIC_DEBUG
+#define irdma_debug(dev, prefix, ...) \
+ dev_dbg(to_device(dev), prefix ": " __VA_ARGS__)
+#define irdma_debug_buf(dev, prefix, desc, buf, size) \
+ print_hex_dump_debug(prefix ": " desc " ", \
+ DUMP_PREFIX_OFFSET, \
+ 16, 8, buf, size, false)
+#else
+#define irdma_debug(dev, mask, fmt, ...) \
+do { \
+ if (((mask) & (dev)->debug_mask)) \
+ dev_info(to_device(dev), \
+ "%s: " fmt, __func__, \
+ ##__VA_ARGS__); \
+} while (0)
+#define irdma_debug_buf(dev, mask, desc, buf, size) \
+do { \
+ if (((mask) & (dev)->debug_mask)) \
+ print_hex_dump_debug(desc " ", \
+ DUMP_PREFIX_OFFSET, \
+ 16, 8, buf, size, false); \
+} while (0)
+#endif
+
+struct irdma_dma_info {
+ dma_addr_t *dmaaddrs;
+};
+
+struct irdma_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+} __packed;
+
+struct irdma_virt_mem {
+ void *va;
+ u32 size;
+} __packed;
+
+struct irdma_sc_vsi;
+struct irdma_sc_dev;
+struct irdma_sc_qp;
+struct irdma_puda_buf;
+struct irdma_puda_cmpl_info;
+struct irdma_update_sds_info;
+struct irdma_hmc_fcn_info;
+struct irdma_virtchnl_work_info;
+struct irdma_manage_vf_pble_info;
+struct irdma_hw;
+struct irdma_pci_f;
+
+u8 __iomem *irdma_get_hw_addr(void *dev);
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
+bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
+void irdma_add_dev_ref(struct irdma_sc_dev *dev);
+void irdma_put_dev_ref(struct irdma_sc_dev *dev);
+enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc,
+ void *addr,
+ u32 len, u32 val);
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf);
+void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf,
+ u16 len,
+ u32 seqnum);
+void irdma_free_hash_desc(struct shash_desc *hash_desc);
+enum irdma_status_code irdma_init_hash_desc(struct shash_desc **hash_desc);
+enum irdma_status_code
+irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+enum irdma_status_code
+irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo);
+enum irdma_status_code
+irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem,
+ u8 hmc_fn_id);
+enum irdma_status_code
+irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem,
+ u8 hmc_fn_id);
+enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
+enum irdma_status_code
+irdma_cqp_manage_vf_pble_bp(struct irdma_sc_dev *dev,
+ struct irdma_manage_vf_pble_info *info);
+void irdma_cqp_spawn_worker(struct irdma_sc_dev *dev,
+ struct irdma_virtchnl_work_info *work_info,
+ u32 iw_vf_idx);
+void *irdma_remove_head(struct list_head *list);
+enum irdma_status_code
+irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend);
+void irdma_term_modify_qp(struct irdma_sc_qp *qp,
+ u8 next_state,
+ u8 term,
+ u8 term_len);
+void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred);
+void irdma_terminate_start_timer(struct irdma_sc_qp *qp);
+void irdma_terminate_del_timer(struct irdma_sc_qp *qp);
+enum irdma_status_code
+irdma_hw_manage_vf_pble_bp(struct irdma_pci_f *rf,
+ struct irdma_manage_vf_pble_info *info,
+ bool wait);
+void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi);
+void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi);
+void wr32(struct irdma_hw *hw, u32 reg, u32 val);
+u32 rd32(struct irdma_hw *hw, u32 reg);
+u64 rd64(struct irdma_hw *hw, u32 reg);
+#endif /* _IRDMA_OSDEP_H_ */
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
new file mode 100644
index 0000000..d65004b
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_PROTOS_H
+#define IRDMA_PROTOS_H
+
+#define PAUSE_TIMER_VAL 0xffff
+#define REFRESH_THRESHOLD 0x7fff
+#define HIGH_THRESHOLD 0x800
+#define LOW_THRESHOLD 0x200
+#define ALL_TC2PFC 0xff
+#define CQP_COMPL_WAIT_TIME_MS 10
+#define CQP_TIMEOUT_THRESHOLD 500
+
+/**
+ * irdma_insert_wqe_hdr - write wqe header
+ * @wqe: cqp wqe for header
+ * @header: header for the cqp wqe
+ */
+static inline void irdma_insert_wqe_hdr(__le64 *wqe, u64 hdr)
+{
+ wmb(); /* make sure WQE is populated before polarity is set */
+ set_64bit_val(wqe, 24, hdr);
+}
+
+/* init operations */
+enum irdma_status_code irdma_sc_ctrl_init(enum irdma_vers ver,
+ struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info);
+void irdma_sc_rt_init(struct irdma_sc_dev *dev);
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
+__le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch);
+enum irdma_status_code irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq);
+/* HMC/FPM functions */
+enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev,
+ u8 hmc_fn_id);
+/* stats misc */
+enum irdma_status_code irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat,
+ bool wait);
+enum irdma_status_code irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev,
+ u8 cmd,
+ struct irdma_ws_node_info *node_info);
+enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev,
+ u8 cmd,
+ struct irdma_up_info *map_info);
+enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev,
+ struct irdma_sc_ceq *sc_ceq,
+ u8 op);
+enum irdma_status_code
+irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi,
+ u8 cmd,
+ struct irdma_stats_inst_info *stats_info);
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev);
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id);
+void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats,
+ struct irdma_gather_stats *gather_stats,
+ struct irdma_gather_stats *last_gather_stats);
+/* vsi functions */
+enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info);
+void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi);
+void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_init_info *info);
+enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
+void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
+/* misc L2 param change functions */
+void irdma_change_l2params(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2params);
+void irdma_suspend_qps(struct irdma_sc_vsi *vsi);
+void irdma_qp_add_qos(struct irdma_sc_qp *qp);
+void irdma_qp_rem_qos(struct irdma_sc_qp *qp);
+struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
+ struct irdma_sc_qp *qp);
+void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi);
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev);
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id);
+enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+/* terminate functions*/
+void irdma_terminate_send_fin(struct irdma_sc_qp *qp);
+
+void irdma_terminate_connection(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info);
+
+void irdma_terminate_received(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info);
+/* dynamic memory allocation */
+enum irdma_status_code irdma_allocate_dma_mem(struct irdma_hw *hw,
+ struct irdma_dma_mem *mem,
+ u64 size,
+ u32 alignment);
+void irdma_free_dma_mem(struct irdma_hw *hw, struct irdma_dma_mem *mem);
+enum irdma_status_code irdma_allocate_virt_mem(struct irdma_hw *hw,
+ struct irdma_virt_mem *mem,
+ u32 size);
+enum irdma_status_code irdma_free_virt_mem(struct irdma_hw *hw,
+ struct irdma_virt_mem *mem);
+/* misc */
+u8 irdma_get_encoded_wqe_size(u32 wqsize, bool cqpsq);
+void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp);
+enum irdma_status_code irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp,
+ u64 scratch,
+ u8 hmc_fn_id,
+ bool post_sq,
+ bool poll_registers);
+enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev,
+ u32 qp_count);
+enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev);
+void free_sd_mem(struct irdma_sc_dev *dev);
+enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo);
+enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev);
+#endif /* IRDMA_PROTOS_H */
diff --git a/drivers/infiniband/hw/irdma/status.h b/drivers/infiniband/hw/irdma/status.h
new file mode 100644
index 0000000..a23e42c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/status.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_STATUS_H
+#define IRDMA_STATUS_H
+
+/* Error Codes */
+enum irdma_status_code {
+ IRDMA_SUCCESS = 0,
+ IRDMA_ERR_NVM = -1,
+ IRDMA_ERR_NVM_CHECKSUM = -2,
+ IRDMA_ERR_CFG = -4,
+ IRDMA_ERR_PARAM = -5,
+ IRDMA_ERR_DEVICE_NOT_SUPPORTED = -6,
+ IRDMA_ERR_RESET_FAILED = -7,
+ IRDMA_ERR_SWFW_SYNC = -8,
+ IRDMA_ERR_NO_MEMORY = -9,
+ IRDMA_ERR_BAD_PTR = -10,
+ IRDMA_ERR_INVALID_PD_ID = -11,
+ IRDMA_ERR_INVALID_QP_ID = -12,
+ IRDMA_ERR_INVALID_CQ_ID = -13,
+ IRDMA_ERR_INVALID_CEQ_ID = -14,
+ IRDMA_ERR_INVALID_AEQ_ID = -15,
+ IRDMA_ERR_INVALID_SIZE = -16,
+ IRDMA_ERR_INVALID_ARP_INDEX = -17,
+ IRDMA_ERR_INVALID_FPM_FUNC_ID = -18,
+ IRDMA_ERR_QP_INVALID_MSG_SIZE = -19,
+ IRDMA_ERR_QP_TOOMANY_WRS_POSTED = -20,
+ IRDMA_ERR_INVALID_FRAG_COUNT = -21,
+ IRDMA_ERR_Q_EMPTY = -22,
+ IRDMA_ERR_INVALID_ALIGNMENT = -23,
+ IRDMA_ERR_FLUSHED_Q = -24,
+ IRDMA_ERR_INVALID_PUSH_PAGE_INDEX = -25,
+ IRDMA_ERR_INVALID_INLINE_DATA_SIZE = -26,
+ IRDMA_ERR_TIMEOUT = -27,
+ IRDMA_ERR_OPCODE_MISMATCH = -28,
+ IRDMA_ERR_CQP_COMPL_ERROR = -29,
+ IRDMA_ERR_INVALID_VF_ID = -30,
+ IRDMA_ERR_INVALID_HMCFN_ID = -31,
+ IRDMA_ERR_BACKING_PAGE_ERROR = -32,
+ IRDMA_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
+ IRDMA_ERR_INVALID_PBLE_INDEX = -34,
+ IRDMA_ERR_INVALID_SD_INDEX = -35,
+ IRDMA_ERR_INVALID_PAGE_DESC_INDEX = -36,
+ IRDMA_ERR_INVALID_SD_TYPE = -37,
+ IRDMA_ERR_MEMCPY_FAILED = -38,
+ IRDMA_ERR_INVALID_HMC_OBJ_INDEX = -39,
+ IRDMA_ERR_INVALID_HMC_OBJ_COUNT = -40,
+ IRDMA_ERR_BUF_TOO_SHORT = -43,
+ IRDMA_ERR_BAD_IWARP_CQE = -44,
+ IRDMA_ERR_NVM_BLANK_MODE = -45,
+ IRDMA_ERR_NOT_IMPL = -46,
+ IRDMA_ERR_PE_DOORBELL_NOT_ENA = -47,
+ IRDMA_ERR_NOT_READY = -48,
+ IRDMA_NOT_SUPPORTED = -49,
+ IRDMA_ERR_FIRMWARE_API_VER = -50,
+ IRDMA_ERR_RING_FULL = -51,
+ IRDMA_ERR_MPA_CRC = -61,
+ IRDMA_ERR_NO_TXBUFS = -62,
+ IRDMA_ERR_SEQ_NUM = -63,
+ IRDMA_ERR_list_empty = -64,
+ IRDMA_ERR_INVALID_MAC_ADDR = -65,
+ IRDMA_ERR_BAD_STAG = -66,
+ IRDMA_ERR_CQ_COMPL_ERROR = -67,
+ IRDMA_ERR_Q_DESTROYED = -68,
+ IRDMA_ERR_INVALID_FEAT_CNT = -69,
+ IRDMA_ERR_REG_CQ_FULL = -70,
+ IRDMA_ERR_VF_MSG_ERROR = -71,
+};
+#endif /* IRDMA_STATUS_H */
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
new file mode 100644
index 0000000..b2f8788
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -0,0 +1,2565 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/mii.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <net/neighbour.h>
+#include "main.h"
+
+/**
+ * irdma_arp_table -manage arp table
+ * @rf: RDMA PCI function
+ * @ip_addr: ip address for device
+ * @mac_addr: mac address ptr
+ * @action: modify, delete or add
+ */
+int irdma_arp_table(struct irdma_pci_f *rf,
+ u32 *ip_addr,
+ bool ipv4,
+ u8 *mac_addr,
+ u32 action)
+{
+ int arp_index;
+ int err;
+ u32 ip[4] = {};
+
+ if (ipv4)
+ ip[0] = *ip_addr;
+ else
+ memcpy(ip, ip_addr, sizeof(ip));
+
+ for (arp_index = 0; (u32)arp_index < rf->arp_table_size; arp_index++)
+ if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip)))
+ break;
+
+ switch (action) {
+ case IRDMA_ARP_ADD:
+ if (arp_index != rf->arp_table_size)
+ return -1;
+
+ arp_index = 0;
+ err = irdma_alloc_rsrc(rf, rf->allocated_arps,
+ rf->arp_table_size,
+ (u32 *)&arp_index,
+ &rf->next_arp_index);
+ if (err)
+ return err;
+
+ memcpy(rf->arp_table[arp_index].ip_addr, ip,
+ sizeof(rf->arp_table[arp_index].ip_addr));
+ ether_addr_copy(rf->arp_table[arp_index].mac_addr, mac_addr);
+ break;
+ case IRDMA_ARP_RESOLVE:
+ if (arp_index == rf->arp_table_size)
+ return -1;
+
+ break;
+ case IRDMA_ARP_DELETE:
+ if (arp_index == rf->arp_table_size)
+ return -1;
+
+ memset(rf->arp_table[arp_index].ip_addr, 0,
+ sizeof(rf->arp_table[arp_index].ip_addr));
+ eth_zero_addr(rf->arp_table[arp_index].mac_addr);
+ irdma_free_rsrc(rf, rf->allocated_arps, arp_index);
+ break;
+ default:
+ return -1;
+ }
+
+ return arp_index;
+}
+
+/**
+ * irdma_add_arp - add a new arp entry if needed
+ * @rf: RDMA function
+ * @ip: IP address
+ * @mac: MAC address
+ */
+int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac)
+{
+ int arpidx;
+
+ arpidx = irdma_arp_table(rf, &ip[0], ipv4, NULL, IRDMA_ARP_RESOLVE);
+ if (arpidx >= 0) {
+ if (ether_addr_equal(rf->arp_table[arpidx].mac_addr, mac))
+ return arpidx;
+
+ irdma_manage_arp_cache(rf,
+ rf->arp_table[arpidx].mac_addr,
+ ip,
+ ipv4,
+ IRDMA_ARP_DELETE);
+ }
+
+ irdma_manage_arp_cache(rf, mac, ip, ipv4, IRDMA_ARP_ADD);
+
+ return irdma_arp_table(rf, ip, ipv4, NULL, IRDMA_ARP_RESOLVE);
+}
+
+/**
+ * wr32 - write 32 bits to hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ * @value: value to write to register
+ */
+inline void wr32(struct irdma_hw *hw, u32 reg, u32 val)
+{
+ writel(val, hw->hw_addr + reg);
+}
+
+/**
+ * rd32 - read a 32 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u32 rd32(struct irdma_hw *hw, u32 reg)
+{
+ return readl(hw->hw_addr + reg);
+}
+
+/**
+ * rd64 - read a 64 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u64 rd64(struct irdma_hw *hw, u32 reg)
+{
+ return readq(hw->hw_addr + reg);
+}
+
+/**
+ * irdma_inetaddr_event - system notifier for ipv4 addr events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int irdma_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+ struct net_device *netdev;
+ struct net_device *upper_dev;
+ struct irdma_device *iwdev;
+ u32 local_ipaddr;
+
+ iwdev = irdma_find_netdev(event_netdev);
+ if (!iwdev)
+ return NOTIFY_DONE;
+
+ if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
+ return NOTIFY_DONE;
+
+ netdev = iwdev->netdev;
+ upper_dev = netdev_master_upper_dev_get(netdev);
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ if (upper_dev) {
+ struct in_device *in;
+
+ rcu_read_lock();
+ in = __in_dev_get_rcu(upper_dev);
+ local_ipaddr = ntohl(in->ifa_list->ifa_address);
+ rcu_read_unlock();
+ } else {
+ local_ipaddr = ntohl(ifa->ifa_address);
+ }
+
+ switch (event) {
+ case NETDEV_DOWN:
+ irdma_manage_arp_cache(iwdev->rf,
+ netdev->dev_addr,
+ &local_ipaddr,
+ true,
+ IRDMA_ARP_DELETE);
+ irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false);
+ break;
+ case NETDEV_UP:
+ /* Fall through */
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr);
+ irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_inet6addr_event - system notifier for ipv6 addr events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int irdma_inet6addr_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct net_device *event_netdev = ifa->idev->dev;
+ struct net_device *netdev;
+ struct irdma_device *iwdev;
+ u32 local_ipaddr6[4];
+
+ iwdev = irdma_find_netdev(event_netdev);
+ if (!iwdev)
+ return NOTIFY_DONE;
+
+ if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
+ return NOTIFY_DONE;
+
+ netdev = iwdev->netdev;
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+ switch (event) {
+ case NETDEV_DOWN:
+ irdma_manage_arp_cache(iwdev->rf,
+ netdev->dev_addr,
+ local_ipaddr6,
+ false,
+ IRDMA_ARP_DELETE);
+ irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false);
+ break;
+ case NETDEV_UP:
+ /* Fall through */
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, local_ipaddr6, false, netdev->dev_addr);
+ irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_net_event - system notifier for net events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: neighbor
+ */
+int irdma_net_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct neighbour *neigh = ptr;
+ struct irdma_device *iwdev;
+ __be32 *p;
+ u32 local_ipaddr[4];
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ iwdev = irdma_find_netdev((struct net_device *)neigh->dev);
+ if (!iwdev)
+ return NOTIFY_DONE;
+
+ if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
+ return NOTIFY_DONE;
+
+ p = (__be32 *)neigh->primary_key;
+ irdma_copy_ip_ntohl(local_ipaddr, p);
+ if (neigh->nud_state & NUD_VALID)
+ irdma_add_arp(iwdev->rf, local_ipaddr, false, neigh->ha);
+
+ else
+ irdma_manage_arp_cache(iwdev->rf,
+ neigh->ha,
+ local_ipaddr,
+ false,
+ IRDMA_ARP_DELETE);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_netdevice_event - system notifier for netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: netdev
+ */
+int irdma_netdevice_event(struct notifier_block *notifier,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *event_netdev;
+ struct net_device *netdev;
+ struct irdma_device *iwdev;
+
+ event_netdev = netdev_notifier_info_to_dev(ptr);
+
+ iwdev = irdma_find_netdev(event_netdev);
+ if (!iwdev) {
+ irdma_handle_netdev(event_netdev);
+ return NOTIFY_DONE;
+ }
+
+ if (iwdev->init_state < RDMA_DEV_REGISTERED || iwdev->closing)
+ return NOTIFY_DONE;
+
+ netdev = (struct net_device *)iwdev->netdev;
+ if (netdev != event_netdev)
+ return NOTIFY_DONE;
+
+ iwdev->iw_status = 1;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ iwdev->iw_status = 0;
+ /* Fall through */
+ case NETDEV_UP:
+ irdma_port_ibevent(iwdev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_get_cqp_request - get cqp struct
+ * @cqp: device cqp ptr
+ * @wait: cqp to be used in wait mode
+ */
+struct irdma_cqp_request *irdma_get_cqp_request(struct irdma_cqp *cqp,
+ bool wait)
+{
+ struct irdma_cqp_request *cqp_request = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ if (!list_empty(&cqp->cqp_avail_reqs)) {
+ cqp_request = list_entry(cqp->cqp_avail_reqs.next,
+ struct irdma_cqp_request, list);
+ list_del_init(&cqp_request->list);
+ }
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ if (!cqp_request) {
+ cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
+ if (cqp_request) {
+ cqp_request->dynamic = true;
+ if (wait)
+ init_waitqueue_head(&cqp_request->waitq);
+ }
+ }
+ if (!cqp_request) {
+ irdma_debug(cqp->sc_cqp.dev, IRDMA_DEBUG_ERR,
+ "CQP Request Fail: No Memory");
+ return NULL;
+ }
+
+ if (wait) {
+ atomic_set(&cqp_request->refcount, 2);
+ cqp_request->waiting = true;
+ } else {
+ atomic_set(&cqp_request->refcount, 1);
+ }
+ memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info));
+
+ return cqp_request;
+}
+
+/**
+ * irdma_free_cqp_request - free cqp request
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void irdma_free_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ unsigned long flags;
+
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+ cqp_request->request_done = false;
+ cqp_request->callback_fcn = NULL;
+ cqp_request->waiting = false;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ }
+ wake_up(&cqp->remove_wq);
+}
+
+/**
+ * irdma_put_cqp_request - dec ref count and free if 0
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void irdma_put_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (atomic_dec_and_test(&cqp_request->refcount))
+ irdma_free_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_free_pending_cqp_request -free pending cqp request objs
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+static void
+irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (cqp_request->waiting) {
+ cqp_request->compl_info.error = true;
+ cqp_request->request_done = true;
+ wake_up(&cqp_request->waitq);
+ }
+ wait_event_timeout(cqp->remove_wq,
+ atomic_read(&cqp_request->refcount) == 1,
+ 1000);
+ irdma_put_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_cleanup_pending_cqp_op - clean-up cqp with no
+ * completions
+ * @rf: RDMA PCI function
+ */
+void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp *cqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request = NULL;
+ struct cqp_cmds_info *pcmdinfo = NULL;
+ u32 i, pending_work, wqe_idx;
+
+ pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring);
+ wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring);
+ for (i = 0; i < pending_work; i++) {
+ cqp_request = (struct irdma_cqp_request *)
+ (unsigned long)cqp->scratch_array[wqe_idx];
+ if (cqp_request)
+ irdma_free_pending_cqp_request(cqp, cqp_request);
+ wqe_idx = (wqe_idx + 1) % IRDMA_RING_SIZE(cqp->sc_cqp.sq_ring);
+ }
+
+ while (!list_empty(&dev->cqp_cmd_head)) {
+ pcmdinfo = (struct cqp_cmds_info *)
+ irdma_remove_head(&dev->cqp_cmd_head);
+ cqp_request =
+ container_of(pcmdinfo, struct irdma_cqp_request, info);
+ if (cqp_request)
+ irdma_free_pending_cqp_request(cqp, cqp_request);
+ }
+}
+
+/**
+ * irdma_free_qp_worker - worker for freeing QP resources
+ * @work: ptr to work struct
+ */
+static void irdma_free_qp_worker(struct work_struct *work)
+{
+ struct irdma_qp *iwqp = container_of(work, struct irdma_qp, work);
+
+ irdma_rem_pdusecount(iwqp->iwpd, iwqp->iwdev);
+ irdma_rem_devusecount(iwqp->iwdev);
+ irdma_free_qp_rsrc(iwqp->iwdev, iwqp, iwqp->ibqp.qp_num);
+}
+
+/**
+ * irdma_free_qp - callback after destroy cqp completes
+ * @cqp_request: cqp request for destroy qp
+ * @num: not used
+ */
+static void irdma_free_qp(struct irdma_cqp_request *cqp_request,
+ u32 num)
+{
+ struct irdma_sc_qp *qp = (struct irdma_sc_qp *)cqp_request->param;
+ struct irdma_qp *iwqp = (struct irdma_qp *)qp->back_qp;
+ struct irdma_device *iwdev = iwqp->iwdev;
+
+ INIT_WORK(&iwqp->work, irdma_free_qp_worker);
+ queue_work(iwdev->rf->free_qp_wq, &iwqp->work);
+}
+
+/**
+ * irdma_wait_event - wait for completion
+ * @rf: RDMA PCI function
+ * @cqp_request: cqp request to wait
+ */
+static int irdma_wait_event(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
+{
+ struct cqp_cmds_info *info = &cqp_request->info;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_timeout cqp_timeout = {};
+ bool cqp_error = false;
+ int err_code = 0;
+
+ cqp_timeout.compl_cqp_cmds =
+ rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
+ do {
+ up(&iwcqp->cqp_compl_sem);
+ if (wait_event_timeout(cqp_request->waitq,
+ cqp_request->request_done,
+ msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
+ break;
+
+ rf->sc_dev.cqp_ops->check_cqp_progress(&cqp_timeout,
+ &rf->sc_dev);
+
+ if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD)
+ continue;
+
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "error cqp command 0x%x timed out",
+ info->cqp_cmd);
+ err_code = -ETIME;
+ if (!rf->reset) {
+ rf->reset = true;
+ irdma_request_reset(rf);
+ }
+ goto done;
+ } while (1);
+
+ cqp_error = cqp_request->compl_info.error;
+ if (cqp_error) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "error cqp command 0x%x completion maj = 0x%x min=0x%x\n",
+ info->cqp_cmd,
+ cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code);
+ err_code = -EPROTO;
+ goto done;
+ }
+
+done:
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return err_code;
+}
+
+/**
+ * irdma_handle_cqp_op - process cqp command
+ * @rf: RDMA PCI function
+ * @cqp_request: cqp request to process
+ */
+enum irdma_status_code irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request
+ *cqp_request)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ enum irdma_status_code status;
+ struct cqp_cmds_info *info = &cqp_request->info;
+ int err_code = 0;
+
+ if (rf->reset) {
+ irdma_free_cqp_request(&rf->cqp, cqp_request);
+ return IRDMA_ERR_CQP_COMPL_ERROR;
+ }
+
+ status = irdma_process_cqp_cmd(dev, info);
+ if (status) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "error cqp command 0x%x failed\n", info->cqp_cmd);
+ irdma_free_cqp_request(&rf->cqp, cqp_request);
+ return status;
+ }
+
+ if (cqp_request->waiting)
+ err_code = irdma_wait_event(rf, cqp_request);
+ if (err_code)
+ status = IRDMA_ERR_CQP_COMPL_ERROR;
+
+ return status;
+}
+
+/**
+ * irdma_add_devusecount - add dev refcount
+ * @iwdev: dev for refcount
+ */
+void irdma_add_devusecount(struct irdma_device *iwdev)
+{
+ atomic64_inc(&iwdev->use_count);
+}
+
+/**
+ * irdma_rem_devusecount - decrement refcount for dev
+ * @iwdev: device
+ */
+void irdma_rem_devusecount(struct irdma_device *iwdev)
+{
+ if (!atomic64_dec_and_test(&iwdev->use_count))
+ return;
+ wake_up(&iwdev->close_wq);
+}
+
+/**
+ * irdma_add_pdusecount - add pd refcount
+ * @iwpd: pd for refcount
+ */
+void irdma_add_pdusecount(struct irdma_pd *iwpd)
+{
+ atomic_inc(&iwpd->usecount);
+}
+
+/**
+ * irdma_rem_pdusecount - decrement refcount for pd and free if 0
+ * @iwpd: pd for refcount
+ * @iwdev: iwarp device
+ */
+void irdma_rem_pdusecount(struct irdma_pd *iwpd,
+ struct irdma_device *iwdev)
+{
+ if (!atomic_dec_and_test(&iwpd->usecount))
+ return;
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id);
+}
+
+/**
+ * irdma_add_ref - add refcount for qp
+ * @ibqp: iqarp qp
+ */
+void irdma_add_ref(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = (struct irdma_qp *)ibqp;
+
+ atomic_inc(&iwqp->refcount);
+}
+
+/**
+ * irdma_rem_ref - rem refcount for qp and free if 0
+ * @ibqp: iqarp qp
+ */
+void irdma_rem_ref(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp;
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_device *iwdev;
+ u32 qp_num;
+ unsigned long flags;
+
+ iwqp = to_iwqp(ibqp);
+ iwdev = iwqp->iwdev;
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (!atomic_dec_and_test(&iwqp->refcount)) {
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ return;
+ }
+
+ qp_num = iwqp->ibqp.qp_num;
+ iwdev->rf->qp_table[qp_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ cqp_request = irdma_get_cqp_request(&iwdev->rf->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_request->callback_fcn = irdma_free_qp;
+ cqp_request->param = (void *)&iwqp->sc_qp;
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (!status)
+ return;
+
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP Destroy QP fail");
+ irdma_rem_pdusecount(iwqp->iwpd, iwdev);
+ irdma_free_qp_rsrc(iwdev, iwqp, qp_num);
+ irdma_rem_devusecount(iwdev);
+}
+
+/**
+ * irdma_get_qp - get qp address
+ * @device: iwarp device
+ * @qpn: qp number
+ */
+struct ib_qp *irdma_get_qp(struct ib_device *device,
+ int qpn)
+{
+ struct irdma_device *iwdev = to_iwdev(device);
+
+ if (qpn < IW_FIRST_QPN || qpn >= iwdev->rf->max_qp)
+ return NULL;
+
+ return &iwdev->rf->qp_table[qpn]->ibqp;
+}
+
+/**
+ * irdma_get_hw_addr - return hw addr
+ * @par: points to shared dev
+ */
+u8 __iomem *irdma_get_hw_addr(void *par)
+{
+ struct irdma_sc_dev *dev = (struct irdma_sc_dev *)par;
+
+ return dev->hw->hw_addr;
+}
+
+/**
+ * irdma_remove_head - return head entry and remove from list
+ * @list: list for entry
+ */
+void *irdma_remove_head(struct list_head *list)
+{
+ struct list_head *entry;
+
+ if (list_empty(list))
+ return NULL;
+
+ entry = (void *)list->next;
+ list_del(entry);
+
+ return (void *)entry;
+}
+
+/**
+ * irdma_allocate_dma_mem - Memory alloc helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ */
+enum irdma_status_code irdma_allocate_dma_mem(struct irdma_hw *hw,
+ struct irdma_dma_mem *mem,
+ u64 size,
+ u32 alignment)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+ if (!mem)
+ return IRDMA_ERR_PARAM;
+
+ mem->size = ALIGN(size, alignment);
+ mem->va = dma_alloc_coherent(&pcidev->dev, mem->size,
+ (dma_addr_t *)&mem->pa, GFP_KERNEL);
+ if (!mem->va)
+ return IRDMA_ERR_NO_MEMORY;
+
+ return 0;
+}
+
+/**
+ * irdma_free_dma_mem - Memory free helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to free
+ */
+void irdma_free_dma_mem(struct irdma_hw *hw,
+ struct irdma_dma_mem *mem)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+ if (!mem || !mem->va)
+ return;
+
+ dma_free_coherent(&pcidev->dev, mem->size,
+ mem->va, (dma_addr_t)mem->pa);
+
+ mem->va = NULL;
+}
+
+/**
+ * irdma_allocate_virt_mem - virtual memory alloc helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to fill out
+ * @size: size of memory requested
+ */
+enum irdma_status_code irdma_allocate_virt_mem(struct irdma_hw *hw,
+ struct irdma_virt_mem *mem,
+ u32 size)
+{
+ if (!mem)
+ return IRDMA_ERR_PARAM;
+
+ mem->size = size;
+ mem->va = kzalloc(size, GFP_ATOMIC);
+
+ if (mem->va)
+ return 0;
+ else
+ return IRDMA_ERR_NO_MEMORY;
+}
+
+/**
+ * irdma_free_virt_mem - virtual memory free helper fn
+ * @hw: pointer to the HW structure
+ * @mem: ptr to mem struct to free
+ */
+enum irdma_status_code irdma_free_virt_mem(struct irdma_hw *hw,
+ struct irdma_virt_mem *mem)
+{
+ if (!mem)
+ return IRDMA_ERR_PARAM;
+ kfree(mem->va);
+ return 0;
+}
+
+/**
+ * irdma_cqp_sds_cmd - create cqp command for sd
+ * @dev: hardware control device structure
+ * @sd_info: information for sd cqp
+ *
+ */
+enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *sdinfo)
+{
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
+ sizeof(cqp_info->in.u.update_pe_sds.info));
+ cqp_info->cqp_cmd = IRDMA_OP_UPDATE_PE_SDS;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.update_pe_sds.dev = dev;
+ cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP Update SD's fail");
+
+ return status;
+}
+
+/**
+ * irdma_qp_suspend_resume - cqp command for suspend/resume
+ * @qp: hardware control qp
+ * @suspend: flag if suspend or resume
+ */
+enum irdma_status_code irdma_qp_suspend_resume(struct irdma_sc_qp *qp,
+ bool suspend)
+{
+ struct irdma_sc_dev *dev = qp->dev;
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = (suspend) ? IRDMA_OP_SUSPEND : IRDMA_OP_RESUME;
+ cqp_info->in.u.suspend_resume.cqp = cqp;
+ cqp_info->in.u.suspend_resume.qp = qp;
+ cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR,
+ "CQP-OP QP Suspend/Resume fail");
+
+ return status;
+}
+
+/**
+ * irdma_term_modify_qp - modify qp for term message
+ * @qp: hardware control qp
+ * @next_state: qp's next state
+ * @term: terminate code
+ * @term_len: length
+ */
+void irdma_term_modify_qp(struct irdma_sc_qp *qp,
+ u8 next_state,
+ u8 term,
+ u8 term_len)
+{
+ struct irdma_qp *iwqp;
+
+ iwqp = (struct irdma_qp *)qp->back_qp;
+ irdma_next_iw_state(iwqp, next_state, 0, term, term_len);
+};
+
+/**
+ * irdma_terminate_done - after terminate is completed
+ * @qp: hardware control qp
+ * @timeout_occurred: indicates if terminate timer expired
+ */
+void irdma_terminate_done(struct irdma_sc_qp *qp,
+ int timeout_occurred)
+{
+ struct irdma_qp *iwqp;
+ u32 next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ u8 hte = 0;
+ bool first_time;
+ unsigned long flags;
+
+ iwqp = (struct irdma_qp *)qp->back_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->hte_added) {
+ iwqp->hte_added = 0;
+ hte = 1;
+ }
+ first_time = !(qp->term_flags & IRDMA_TERM_DONE);
+ qp->term_flags |= IRDMA_TERM_DONE;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (first_time) {
+ if (!timeout_occurred)
+ irdma_terminate_del_timer(qp);
+ else
+ next_iwarp_state = IRDMA_QP_STATE_CLOSING;
+
+ irdma_next_iw_state(iwqp, next_iwarp_state, hte, 0, 0);
+ irdma_cm_disconn(iwqp);
+ }
+}
+
+static void irdma_terminate_timeout(struct timer_list *t)
+{
+ struct irdma_qp *iwqp = from_timer(iwqp, t, terminate_timer);
+ struct irdma_sc_qp *qp = (struct irdma_sc_qp *)&iwqp->sc_qp;
+
+ irdma_terminate_done(qp, 1);
+ irdma_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_terminate_start_timer - start terminate timeout
+ * @qp: hardware control qp
+ */
+void irdma_terminate_start_timer(struct irdma_sc_qp *qp)
+{
+ struct irdma_qp *iwqp;
+
+ iwqp = (struct irdma_qp *)qp->back_qp;
+ irdma_add_ref(&iwqp->ibqp);
+ timer_setup(&iwqp->terminate_timer, irdma_terminate_timeout, 0);
+ iwqp->terminate_timer.expires = jiffies + HZ;
+
+ add_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * irdma_terminate_del_timer - delete terminate timeout
+ * @qp: hardware control qp
+ */
+void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
+{
+ struct irdma_qp *iwqp;
+ int ret;
+
+ iwqp = (struct irdma_qp *)qp->back_qp;
+ ret = del_timer(&iwqp->terminate_timer);
+ if (ret)
+ irdma_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_cqp_query_fpm_values_cmd - send cqp command for fpm
+ * @iwdev: function device struct
+ * @val_mem: buffer for fpm
+ * @hmc_fn_id: function id for fpm
+ */
+enum irdma_status_code
+irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem,
+ u8 hmc_fn_id)
+{
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_request->param = NULL;
+ cqp_info->in.u.query_fpm_val.cqp = dev->cqp;
+ cqp_info->in.u.query_fpm_val.fpm_val_pa = val_mem->pa;
+ cqp_info->in.u.query_fpm_val.fpm_val_va = val_mem->va;
+ cqp_info->in.u.query_fpm_val.hmc_fn_id = hmc_fn_id;
+ cqp_info->cqp_cmd = IRDMA_OP_QUERY_FPM_VAL;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.query_fpm_val.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP Query FPM fail");
+
+ return status;
+}
+
+/**
+ * irdma_cqp_commit_fpm_values_cmd - commit fpm values in hw
+ * @dev: hardware control device structure
+ * @val_mem: buffer with fpm values
+ * @hmc_fn_id: function id for fpm
+ */
+enum irdma_status_code
+irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *val_mem,
+ u8 hmc_fn_id)
+{
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_request->param = NULL;
+ cqp_info->in.u.commit_fpm_val.cqp = dev->cqp;
+ cqp_info->in.u.commit_fpm_val.fpm_val_pa = val_mem->pa;
+ cqp_info->in.u.commit_fpm_val.fpm_val_va = val_mem->va;
+ cqp_info->in.u.commit_fpm_val.hmc_fn_id = hmc_fn_id;
+ cqp_info->cqp_cmd = IRDMA_OP_COMMIT_FPM_VAL;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.commit_fpm_val.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP Commit FPM fail");
+
+ return status;
+}
+
+/**
+ * irdma_cqp_cq_create_cmd - create a cq for the cqp
+ * @dev: device pointer
+ * @cq: pointer to created cq
+ */
+enum irdma_status_code
+irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP Create CQ fail");
+
+ return status;
+}
+
+/**
+ * irdma_cqp_qp_create_cmd - create a qp for the cqp
+ * @dev: device pointer
+ * @qp: pointer to created qp
+ */
+enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev,
+ struct irdma_sc_qp *qp)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_create_qp_info *qp_info;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+ memset(qp_info, 0, sizeof(*qp_info));
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS;
+ cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP QP create fail");
+
+ return status;
+}
+
+/**
+ * irdma_dealloc_push_page - free a push page for qp
+ * @rf: RDMA PCI function
+ * @qp: hardware control qp
+ */
+static void irdma_dealloc_push_page(struct irdma_pci_f *rf,
+ struct irdma_sc_qp *qp)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX)
+ return;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 1;
+ cqp_info->in.u.manage_push_page.cqp = &rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status)
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+ else
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP dealloc Push page fail");
+}
+
+/**
+ * irdma_free_qp_resources - free up memory resources for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @qp_num: qp number assigned
+ */
+void irdma_free_qp_rsrc(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ u32 qp_num)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
+ irdma_dealloc_push_page(rf, &iwqp->sc_qp);
+ if (iwqp->sc_qp.vsi)
+ iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi, iwqp->sc_qp.user_pri);
+
+ if (qp_num > 2)
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->q2_ctx_mem);
+ irdma_free_dma_mem(rf->sc_dev.hw, &iwqp->kqp.dma_mem);
+ kfree(iwqp->kqp.wrid_mem);
+ iwqp->kqp.wrid_mem = NULL;
+ kfree(iwqp->allocated_buf);
+}
+
+/**
+ * irdma_cq_wq_destroy - send cq destroy cqp
+ * @rf: RDMA PCI function
+ * @cq: hardware control cq
+ */
+void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
+{
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_destroy.cq = cq;
+ cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP Destroy CQ fail");
+}
+
+/**
+ * irdma_hw_modify_qp_callback - handle state for modifyQPs that don't wait
+ * @cqp_request: modify QP completion
+ * @num: not used
+ */
+static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request,
+ u32 num)
+{
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_qp *iwqp;
+
+ cqp_info = &cqp_request->info;
+ iwqp = cqp_info->in.u.qp_modify.qp->back_qp;
+ atomic_dec(&iwqp->hw_mod_qp_pend);
+ wake_up(&iwqp->mod_qp_waitq);
+}
+
+/**
+ * irdma_hw_modify_qp - setup cqp for modify qp
+ * @rf: RDMA PCI function
+ * @iwqp: qp ptr (user or kernel)
+ * @info: info for modify qp
+ * @wait: flag to wait or not for modify qp completion
+ */
+enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info,
+ bool wait)
+{
+ enum irdma_status_code status;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_modify_qp_info *m_info;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ if (!wait) {
+ cqp_request->callback_fcn = irdma_hw_modify_qp_callback;
+ atomic_inc(&iwqp->hw_mod_qp_pend);
+ }
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status) {
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1))
+ return status;
+ switch (m_info->next_iwarp_state) {
+ struct irdma_gen_ae_info ae_info;
+
+ case IRDMA_QP_STATE_RTS:
+ case IRDMA_QP_STATE_IDLE:
+ case IRDMA_QP_STATE_TERMINATE:
+ case IRDMA_QP_STATE_CLOSING:
+ if (info->curr_iwarp_state == IRDMA_QP_STATE_IDLE)
+ irdma_send_reset(iwqp->cm_node);
+ else
+ iwqp->sc_qp.term_flags = IRDMA_TERM_DONE;
+ if (!wait) {
+ ae_info.ae_code = IRDMA_AE_BAD_CLOSE;
+ ae_info.ae_src = 0;
+ irdma_gen_ae(rf, &iwqp->sc_qp, &ae_info, false);
+ } else {
+ cqp_request = irdma_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ m_info->next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ m_info->reset_tcp_conn = true;
+ irdma_handle_cqp_op(rf, cqp_request);
+ }
+ break;
+ case IRDMA_QP_STATE_ERROR:
+ default:
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * irdma_cqp_cq_destroy_cmd - destroy the cqp cq
+ * @dev: device pointer
+ * @cq: pointer to cq
+ */
+void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ irdma_cq_wq_destroy(rf, cq);
+}
+
+/**
+ * irdma_cqp_qp_destroy_cmd - destroy the cqp
+ * @dev: device pointer
+ * @qp: pointer to qp
+ */
+void irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP QP_DESTROY fail");
+}
+
+/**
+ * irdma_ieq_mpa_crc_ae - generate AE for crc error
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev,
+ struct irdma_sc_qp *qp)
+{
+ struct irdma_gen_ae_info info = {};
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ irdma_debug(dev, IRDMA_DEBUG_AEQ, "Generate MPA CRC AE\n");
+ info.ae_code = IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR;
+ info.ae_src = IRDMA_AE_SOURCE_RQ;
+ irdma_gen_ae(rf, qp, &info, false);
+}
+
+/**
+ * irdma_init_hash_desc - initialize hash for crc calculation
+ * @desc: cryption type
+ */
+enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *tdesc;
+
+ tfm = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(tfm))
+ return IRDMA_ERR_MPA_CRC;
+
+ tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm), GFP_KERNEL);
+ if (!tdesc) {
+ crypto_free_shash(tfm);
+ return IRDMA_ERR_MPA_CRC;
+ }
+
+ tdesc->tfm = tfm;
+ *desc = tdesc;
+
+ return 0;
+}
+
+/**
+ * irdma_free_hash_desc - free hash desc
+ * @desc: to be freed
+ */
+void irdma_free_hash_desc(struct shash_desc *desc)
+{
+ if (desc) {
+ crypto_free_shash(desc->tfm);
+ kfree(desc);
+ }
+}
+
+/**
+ * irdma_ieq_check_mpacrc - check if mpa crc is OK
+ * @desc: desc for hash
+ * @addr: address of buffer for crc
+ * @length: length of buffer
+ * @value: value to be compared
+ */
+enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc,
+ void *addr,
+ u32 len,
+ u32 val)
+{
+ u32 crc = 0;
+ int ret;
+ enum irdma_status_code ret_code = 0;
+
+ crypto_shash_init(desc);
+ ret = crypto_shash_update(desc, addr, len);
+ if (!ret)
+ crypto_shash_final(desc, (u8 *)&crc);
+ if (crc != val) {
+ irdma_pr_err("mpa crc check fail");
+ ret_code = IRDMA_ERR_MPA_CRC;
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_ieq_get_qp - get qp based on quad in puda buffer
+ * @dev: hardware control device structure
+ * @buf: receive puda buffer on exception q
+ */
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_cm_node *cm_node;
+ struct irdma_device *iwdev = buf->vsi->back_vsi;
+ u32 loc_addr[4] = {};
+ u32 rem_addr[4] = {};
+ u16 loc_port, rem_port;
+ struct ipv6hdr *ip6h;
+ struct iphdr *iph = (struct iphdr *)buf->iph;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct udphdr *udph;
+ struct irdma_bth *bth;
+
+ if (iph->protocol == IPPROTO_UDP) {
+ udph = (struct udphdr *)tcph;
+ bth = (struct irdma_bth *)udph + sizeof(*udph);
+ iwqp = rf->qp_table[be32_to_cpu(bth->qpn)];
+ return &iwqp->sc_qp;
+ }
+
+ if (iph->version == 4) {
+ loc_addr[0] = ntohl(iph->daddr);
+ rem_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ irdma_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
+ }
+ loc_port = ntohs(tcph->dest);
+ rem_port = ntohs(tcph->source);
+ cm_node = irdma_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
+ loc_addr, false, true);
+ if (!cm_node)
+ return NULL;
+
+ iwqp = cm_node->iwqp;
+
+ return &iwqp->sc_qp;
+}
+
+/**
+ * irdma_send_ieq_ack - ACKs for duplicate or OOO partials FPDUs
+ * @qp: qp ptr
+ */
+void irdma_send_ieq_ack(struct irdma_sc_qp *qp)
+{
+ struct irdma_cm_node *cm_node = ((struct irdma_qp *)qp->back_qp)->cm_node;
+ struct irdma_puda_buf *buf = qp->pfpdu.lastrcv_buf;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+ cm_node->tcp_cntxt.rcv_nxt = qp->pfpdu.nextseqnum;
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+
+ irdma_send_ack(cm_node);
+}
+
+/**
+ * irdma_puda_ieq_get_ah_info - get AH info from IEQ buffer
+ * @qp: qp pointer
+ * @ah_info: AH info pointer
+ */
+void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp,
+ struct irdma_ah_info *ah_info)
+{
+ struct irdma_puda_buf *buf = qp->pfpdu.ah_buf;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+
+ memset(ah_info, 0, sizeof(*ah_info));
+ ah_info->do_lpbk = true;
+ ah_info->vlan_tag = buf->vlan_id;
+ ah_info->insert_vlan_tag = buf->vlan_valid;
+ ah_info->ipv4_valid = buf->ipv4;
+ ah_info->vsi = qp->vsi;
+
+ if (buf->smac_valid)
+ ether_addr_copy(ah_info->mac_addr, buf->smac);
+
+ if (buf->ipv4) {
+ ah_info->ipv4_valid = true;
+ iph = (struct iphdr *)buf->iph;
+ ah_info->hop_ttl = iph->ttl;
+ ah_info->tc_tos = iph->tos;
+ ah_info->dest_ip_addr[0] = ntohl(iph->daddr);
+ ah_info->src_ip_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ ah_info->hop_ttl = ip6h->hop_limit;
+ ah_info->tc_tos = ip6h->priority;
+ irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(ah_info->src_ip_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ }
+
+ ah_info->dst_arpindex = irdma_arp_table(qp->dev->back_dev,
+ ah_info->dest_ip_addr,
+ ah_info->ipv4_valid,
+ NULL, IRDMA_ARP_RESOLVE);
+}
+
+/**
+ * irdma_gen1_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @length: length of buffer
+ * @seqnum: seq number for tcp
+ */
+static void irdma_gen1_ieq_update_tcpip_info(struct irdma_puda_buf *buf,
+ u16 len,
+ u32 seqnum)
+{
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ u16 iphlen;
+ u16 pktsize;
+ u8 *addr = (u8 *)buf->mem.va;
+
+ iphlen = (buf->ipv4) ? 20 : 40;
+ iph = (struct iphdr *)(addr + buf->maclen);
+ tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
+ pktsize = len + buf->tcphlen + iphlen;
+ iph->tot_len = htons(pktsize);
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * irdma_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @length: length of buffer
+ * @seqnum: seq number for tcp
+ */
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf,
+ u16 len,
+ u32 seqnum)
+{
+ struct tcphdr *tcph;
+ u16 pktsize;
+ u8 *addr;
+
+ if (buf->vsi->dev->hw_attrs.hw_rev == IRDMA_GEN_1)
+ return irdma_gen1_ieq_update_tcpip_info(buf, len, seqnum);
+
+ addr = (u8 *)buf->mem.va;
+ tcph = (struct tcphdr *)addr;
+ pktsize = len + buf->tcphlen;
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * irdma_gen1_puda_get_tcpip_info - get tcpip info from puda
+ * buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+static enum irdma_status_code
+irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
+{
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ u16 iphlen;
+ u16 pkt_len;
+ u8 *mem = (u8 *)buf->mem.va;
+ struct ethhdr *ethh = (struct ethhdr *)buf->mem.va;
+
+ if (ethh->h_proto == htons(0x8100)) {
+ info->vlan_valid = true;
+ buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) &
+ VLAN_VID_MASK;
+ }
+
+ buf->maclen = (info->vlan_valid) ? 18 : 14;
+ iphlen = (info->l3proto) ? 40 : 20;
+ buf->ipv4 = (info->l3proto) ? false : true;
+ buf->iph = mem + buf->maclen;
+ iph = (struct iphdr *)buf->iph;
+ buf->tcph = buf->iph + iphlen;
+ tcph = (struct tcphdr *)buf->tcph;
+
+ if (buf->ipv4) {
+ pkt_len = ntohs(iph->tot_len);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ pkt_len = ntohs(ip6h->payload_len) + iphlen;
+ }
+
+ buf->totallen = pkt_len + buf->maclen;
+
+ if (info->payload_len < buf->totallen) {
+ irdma_debug(buf->vsi->dev, IRDMA_DEBUG_ERR,
+ "payload_len = 0x%x totallen expected0x%x\n",
+ info->payload_len, buf->totallen);
+ return IRDMA_ERR_INVALID_SIZE;
+ }
+
+ buf->tcphlen = (tcph->doff) << 2;
+ buf->datalen = pkt_len - iphlen - buf->tcphlen;
+ buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+
+ return 0;
+}
+
+/**
+ * irdma_puda_get_tcpip_info - get tcpip info from puda buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+enum irdma_status_code
+irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
+{
+ struct tcphdr *tcph;
+ u32 pkt_len;
+ u8 *mem;
+
+ if (buf->vsi->dev->hw_attrs.hw_rev == IRDMA_GEN_1)
+ return irdma_gen1_puda_get_tcpip_info(info, buf);
+
+ mem = (u8 *)buf->mem.va;
+ buf->vlan_valid = info->vlan_valid;
+ if (info->vlan_valid)
+ buf->vlan_id = info->vlan;
+
+ buf->ipv4 = info->ipv4;
+ if (buf->ipv4)
+ buf->iph = mem + 20;
+ else
+ buf->iph = mem;
+
+ buf->tcph = mem + IRDMA_TCP_OFFSET;
+ tcph = (struct tcphdr *)buf->tcph;
+ pkt_len = info->payload_len;
+ buf->totallen = pkt_len;
+ buf->tcphlen = (tcph->doff) << 2;
+ buf->datalen = pkt_len - IRDMA_TCP_OFFSET - buf->tcphlen;
+ buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = IRDMA_TCP_OFFSET + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+
+ if (info->smac_valid) {
+ ether_addr_copy(buf->smac, info->smac);
+ buf->smac_valid = true;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_process_stats - Checking for wrap and update stats
+ *
+ * @pestat: stats structure pointer
+ */
+static void irdma_process_stats(struct irdma_vsi_pestat *pestat)
+{
+ struct irdma_gather_stats *gather_stats =
+ pestat->gather_info.gather_stats;
+ struct irdma_gather_stats *last_gather_stats =
+ pestat->gather_info.last_gather_stats;
+ irdma_update_stats(&pestat->hw_stats, gather_stats, last_gather_stats);
+}
+
+/**
+ * irdma_cqp_gather_stats_gen1 - Gather stats
+ * @vsi: pointer to vsi structure
+ * @gather_info: pointer to gather stats info
+ */
+static void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat)
+{
+ struct irdma_gather_stats *gather_stats = pestat->gather_info.gather_stats;
+ u32 stats_inst_offset_32;
+ u32 stats_inst_offset_64;
+
+ stats_inst_offset_32 = (pestat->gather_info.use_stats_inst) ?
+ pestat->gather_info.stats_inst_index : pestat->hw->hmc.hmc_fn_id;
+ stats_inst_offset_32 *= 4;
+ stats_inst_offset_64 = stats_inst_offset_32 * 2;
+
+ gather_stats->rxvlanerr =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_RXVLANERR]
+ + stats_inst_offset_32);
+ gather_stats->ip4rxdiscard =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_IP4RXDISCARD]
+ + stats_inst_offset_32);
+ gather_stats->ip4rxtrunc =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_IP4RXTRUNC]
+ + stats_inst_offset_32);
+ gather_stats->ip4txnoroute =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_IP4TXNOROUTE]
+ + stats_inst_offset_32);
+ gather_stats->ip6rxdiscard =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_IP6RXDISCARD]
+ + stats_inst_offset_32);
+ gather_stats->ip6rxtrunc =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_IP6RXTRUNC]
+ + stats_inst_offset_32);
+ gather_stats->ip6txnoroute =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_IP6TXNOROUTE]
+ + stats_inst_offset_32);
+ gather_stats->tcprtxseg =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_TCPRTXSEG]
+ + stats_inst_offset_32);
+ gather_stats->tcprxopterr =
+ rd32(dev->hw,
+ dev->hw_stats_regs_32[IRDMA_HW_STAT_INDEX_TCPRXOPTERR]
+ + stats_inst_offset_32);
+
+ gather_stats->ip4rxocts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4RXOCTS]
+ + stats_inst_offset_64);
+ gather_stats->ip4rxpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4RXPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip4txfrag =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4RXFRAGS]
+ + stats_inst_offset_64);
+ gather_stats->ip4rxmcpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4RXMCPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip4txocts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4TXOCTS]
+ + stats_inst_offset_64);
+ gather_stats->ip4txpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4TXPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip4txfrag =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4TXFRAGS]
+ + stats_inst_offset_64);
+ gather_stats->ip4txmcpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP4TXMCPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip6rxocts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6RXOCTS]
+ + stats_inst_offset_64);
+ gather_stats->ip6rxpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6RXPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip6txfrags =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6RXFRAGS]
+ + stats_inst_offset_64);
+ gather_stats->ip6rxmcpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6RXMCPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip6txocts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6TXOCTS]
+ + stats_inst_offset_64);
+ gather_stats->ip6txpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6TXPKTS]
+ + stats_inst_offset_64);
+ gather_stats->ip6txfrags =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6TXFRAGS]
+ + stats_inst_offset_64);
+ gather_stats->ip6txmcpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_IP6TXMCPKTS]
+ + stats_inst_offset_64);
+ gather_stats->tcprxsegs =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_TCPRXSEGS]
+ + stats_inst_offset_64);
+ gather_stats->tcptxsegs =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_TCPTXSEG]
+ + stats_inst_offset_64);
+ gather_stats->rdmarxrds =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMARXRDS]
+ + stats_inst_offset_64);
+ gather_stats->rdmarxsnds =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMARXSNDS]
+ + stats_inst_offset_64);
+ gather_stats->rdmarxwrs =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMARXWRS]
+ + stats_inst_offset_64);
+ gather_stats->rdmatxrds =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMATXRDS]
+ + stats_inst_offset_64);
+ gather_stats->rdmatxsnds =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMATXSNDS]
+ + stats_inst_offset_64);
+ gather_stats->rdmatxwrs =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMATXWRS]
+ + stats_inst_offset_64);
+ gather_stats->rdmavbn =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMAVBND]
+ + stats_inst_offset_64);
+ gather_stats->rdmavinv =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_RDMAVINV]
+ + stats_inst_offset_64);
+ gather_stats->udprxpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_UDPRXPKTS]
+ + stats_inst_offset_64);
+ gather_stats->udptxpkts =
+ rd64(dev->hw,
+ dev->hw_stats_regs_64[IRDMA_HW_STAT_INDEX_UDPTXPKTS]
+ + stats_inst_offset_64);
+
+ irdma_process_stats(pestat);
+}
+
+/**
+ * irdma_process_cqp_stats - Checking for wrap and update stats
+ * @cqp_request: cqp_request structure pointer
+ * @unused param of callback
+ */
+static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request,
+ u32 unused)
+{
+ struct irdma_vsi_pestat *pestat = cqp_request->param;
+
+ irdma_process_stats(pestat);
+}
+
+/**
+ * irdma_cqp_gather_stats_cmd - Gather stats
+ * @vsi: pointer to vsi structure
+ * @gather_info: pointer to gather stats info
+ */
+enum irdma_status_code irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat,
+ bool wait)
+
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+ cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.stats_gather.info = pestat->gather_info;
+ cqp_info->in.u.stats_gather.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.stats_gather.cqp = &rf->cqp.sc_cqp;
+ cqp_request->param = pestat;
+ if (!wait)
+ cqp_request->callback_fcn = irdma_process_cqp_stats;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP STATS_GATHER fail");
+ else if (wait)
+ irdma_process_stats(pestat);
+
+ return status;
+}
+
+/**
+ * irdma_hw_stats_timeout - Stats timer-handler which updates all HW stats
+ * @t: timer_list pointer
+ */
+static void irdma_hw_stats_timeout(struct timer_list *t)
+{
+ struct irdma_vsi_pestat *pf_devstat =
+ from_timer(pf_devstat, t, stats_timer);
+ struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi;
+ struct irdma_device *iwdev = sc_vsi->back_vsi;
+
+ if (iwdev->closing || iwdev->init_state != RDMA_DEV_REGISTERED)
+ goto exit;
+ if (sc_vsi->dev->hw_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_cqp_gather_stats_gen1(sc_vsi->dev, sc_vsi->pestat);
+ else
+ irdma_cqp_gather_stats_cmd(sc_vsi->dev, sc_vsi->pestat, false);
+
+exit:
+ mod_timer(&pf_devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * irdma_hw_stats_start_timer - Start periodic stats timer
+ * @vsi: vsi structure pointer
+ */
+void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_vsi_pestat *devstat = vsi->pestat;
+
+ timer_setup(&devstat->stats_timer, irdma_hw_stats_timeout, 0);
+ mod_timer(&devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * irdma_hw_stats_del_timer - Delete periodic stats timer
+ * @vsi: pointer to vsi structure
+ */
+void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_vsi_pestat *devstat = vsi->pestat;
+
+ del_timer_sync(&devstat->stats_timer);
+}
+
+/**
+ * irdma_cqp_stats_inst_cmd - Allocate/free stats instance
+ * @vsi: pointer to vsi structure
+ * @stats_info: pointer to allocate stats info
+ */
+enum irdma_status_code
+irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi,
+ u8 cmd,
+ struct irdma_stats_inst_info *stats_info)
+{
+ struct irdma_pci_f *rf = vsi->dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+ bool wait = false;
+
+ if (cmd == IRDMA_OP_STATS_ALLOCATE)
+ wait = true;
+ cqp_request = irdma_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.stats_manage.info = *stats_info;
+ cqp_info->in.u.stats_manage.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.stats_manage.cqp = &rf->cqp.sc_cqp;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP MANAGE_STATS fail");
+ else if (wait)
+ stats_info->stats_idx = cqp_request->compl_info.op_ret_val;
+
+ return status;
+}
+
+/**
+ * irdma_cqp_ceq_cmd - Create/Destroy CEQ's after CEQ 0
+ * @iwdev: pointer to device info
+ * @sc_ceq: pointer to ceq structure
+ * @op: Create or Destroy
+ */
+enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev,
+ struct irdma_sc_ceq *sc_ceq,
+ u8 op)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.ceq_create.ceq = sc_ceq;
+ cqp_info->in.u.ceq_create.scratch = (uintptr_t)cqp_request;
+
+ return irdma_handle_cqp_op(rf, cqp_request);
+}
+
+/**
+ * irdma_cqp_ws_node_cmd - Add/modify/delete ws node
+ * @vsi: pointer to vsi structure
+ * @command: Add, modify or delete
+ * @node_info: pointer to ws node info
+ */
+enum irdma_status_code
+irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev,
+ u8 cmd,
+ struct irdma_ws_node_info *node_info)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+ bool poll;
+
+ if (!rf->sc_dev.ceq_valid)
+ poll = true;
+ else
+ poll = false;
+
+ cqp_request = irdma_get_cqp_request(iwcqp, !poll);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.ws_node.info = *node_info;
+ cqp_info->in.u.ws_node.cqp = cqp;
+ cqp_info->in.u.ws_node.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP WS_NODE fail\n");
+ return status;
+ }
+
+ if (poll) {
+ struct irdma_ccq_cqe_info compl_info;
+
+ status = cqp->dev->cqp_ops->poll_for_cqp_op_done(cqp,
+ IRDMA_CQP_OP_WORK_SCHED_NODE,
+ &compl_info);
+ node_info->qs_handle = compl_info.op_ret_val;
+ irdma_debug(cqp->dev, IRDMA_DEBUG_DCB,
+ "opcode=%d, compl_info.retval=%d\n",
+ compl_info.op_code, compl_info.op_ret_val);
+ } else {
+ node_info->qs_handle = cqp_request->compl_info.op_ret_val;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_cqp_up_map_cmd - Set the up-up mapping
+ * @vsi: pointer to vsi structure
+ * @map_info: pointer to up map info
+ */
+enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev,
+ u8 cmd,
+ struct irdma_up_info *map_info)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(iwcqp, false);
+ if (!cqp_request)
+ return IRDMA_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.up_map.info = *map_info;
+ cqp_info->in.u.up_map.cqp = cqp;
+ cqp_info->in.u.up_map.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP UP MAP fail\n");
+ return status;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_ah_cqp_op - perform an AH cqp operation
+ * @rf: RDMA PCI function
+ * @sc_ah: address handle
+ * @cmd: AH operation
+ * @wait: wait if true
+ * @callback_fcn: Callback function on CQP op completion
+ * @cb_param: parameter for callback function
+ *
+ * returns errno
+ */
+int irdma_ah_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_sc_ah *sc_ah,
+ u8 cmd,
+ bool wait,
+ void (*callback_fcn)(struct irdma_cqp_request*, u32),
+ void *cb_param)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_device *iwdev;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ if (cmd == IRDMA_OP_AH_CREATE) {
+ cqp_info->in.u.ah_create.info = sc_ah->ah_info;
+ cqp_info->in.u.ah_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.ah_create.cqp = &rf->cqp.sc_cqp;
+ } else if (cmd == IRDMA_OP_AH_DESTROY) {
+ cqp_info->in.u.ah_destroy.info = sc_ah->ah_info;
+ cqp_info->in.u.ah_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.ah_destroy.cqp = &rf->cqp.sc_cqp;
+ } else {
+ return -EINVAL;
+ }
+
+ if (!wait) {
+ cqp_request->callback_fcn = callback_fcn;
+ cqp_request->param = cb_param;
+ }
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status) {
+ if (wait) {
+ iwdev = sc_ah->ah_info.vsi->back_vsi;
+ if (cmd == IRDMA_OP_AH_CREATE) {
+ irdma_add_devusecount(iwdev);
+ sc_ah->ah_info.ah_valid = true;
+
+ } else {
+ irdma_rem_devusecount(iwdev);
+ sc_ah->ah_info.ah_valid = false;
+ }
+ }
+ } else {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_ah_cb - callback after creation of AH for IEQ
+ * @cqp_request: pointer to cqp_request of create AH
+ * @unused: unused param
+ */
+static void irdma_ieq_ah_cb(struct irdma_cqp_request *cqp_request,
+ u32 unused)
+{
+ struct irdma_sc_qp *qp = cqp_request->param;
+ struct irdma_sc_ah *sc_ah = qp->pfpdu.ah;
+ struct irdma_device *iwdev = qp->vsi->back_vsi;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->pfpdu.lock, flags);
+ if (!cqp_request->compl_info.op_ret_val) {
+ sc_ah->ah_info.ah_valid = true;
+ irdma_add_devusecount(iwdev);
+ irdma_ieq_process_fpdus(qp, qp->vsi->ieq);
+ } else {
+ sc_ah->ah_info.ah_valid = false;
+ irdma_ieq_cleanup_qp(qp->vsi->ieq, qp);
+ }
+ spin_unlock_irqrestore(&qp->pfpdu.lock, flags);
+}
+
+/**
+ * irdma_ilq_ah_cb - callback after creation of AH for ILQ
+ * @cqp_request: pointer to cqp_request of create AH
+ * @unused: unused param
+ */
+static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request,
+ u32 unused)
+{
+ struct irdma_cm_node *cm_node = cqp_request->param;
+ struct irdma_sc_ah *sc_ah = cm_node->ah;
+ struct irdma_device *iwdev = sc_ah->ah_info.vsi->back_vsi;
+
+ if (!cqp_request->compl_info.op_ret_val) {
+ sc_ah->ah_info.ah_valid = true;
+ irdma_add_devusecount(iwdev);
+ } else {
+ sc_ah->ah_info.ah_valid = false;
+ }
+}
+
+/**
+ * irdma_puda_create_ah - create AH for ILQ/IEQ qp's
+ * @dev: device pointer
+ * @ah_info: Address handle info
+ * @wait: When true will wait for operation to complete
+ * @type: ILQ/IEQ
+ * @cb_param: Callback param when not waiting
+ * @ah_ret: Returned pointer to address handle if created
+ *
+ */
+enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info,
+ bool wait,
+ enum puda_rsrc_type type,
+ void *cb_param,
+ struct irdma_sc_ah **ah_ret)
+{
+ struct irdma_sc_ah *ah;
+ struct irdma_pci_f *rf = dev->back_dev;
+ int err;
+
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ *ah_ret = ah;
+ if (!ah)
+ return IRDMA_ERR_NO_MEMORY;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs,
+ rf->max_ah, &ah_info->ah_idx, &rf->next_ah);
+ if (err)
+ goto err_free;
+
+ ah->dev = dev;
+ ah->ah_info = *ah_info;
+
+ if (type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait,
+ irdma_ilq_ah_cb, cb_param);
+ else
+ err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait,
+ irdma_ieq_ah_cb, cb_param);
+
+ if (err)
+ goto error;
+ return 0;
+
+error:
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx);
+err_free:
+ kfree(ah);
+ *ah_ret = NULL;
+ return IRDMA_ERR_NO_MEMORY;
+}
+
+/**
+ * irdma_puda_free_ah - free a puda address handle
+ * @dev: device pointer
+ * @ah: The address handle to free
+ */
+void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ if (!ah)
+ return;
+
+ if (ah->ah_info.ah_valid) {
+ irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_DESTROY, false, NULL, NULL);
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx);
+ irdma_rem_devusecount(ah->ah_info.vsi->back_vsi);
+ }
+
+ kfree(ah);
+}
+
+/**
+ * irdma_gsi_ud_qp_ah_cb - callback after creation of AH for GSI/ID QP
+ * @cqp_request: pointer to cqp_request of create AH
+ * @unused: unused param
+ */
+void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request, u32 unused)
+{
+ struct irdma_sc_ah *sc_ah = cqp_request->param;
+
+ if (!cqp_request->compl_info.op_ret_val)
+ sc_ah->ah_info.ah_valid = true;
+ else
+ sc_ah->ah_info.ah_valid = false;
+}
+
+/**
+ * irdma_destroy_ah_cb - callback after destroy of AH
+ * @cqp_request: pointer to cqp_request of destroy AH
+ * @unused: unused param
+ */
+void irdma_destroy_ah_cb(struct irdma_cqp_request *cqp_request, u32 unused)
+{
+ struct irdma_ah *ah = cqp_request->param;
+ struct irdma_device *iwdev = ah->sc_ah.ah_info.vsi->back_vsi;
+
+ if (!cqp_request->compl_info.op_ret_val)
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs,
+ ah->sc_ah.ah_info.ah_idx);
+ irdma_rem_pdusecount(ah->pd, iwdev);
+ kfree(ah);
+}
+
+/**
+ * irdma_prm_add_pble_mem - add moemory to pble resources
+ * @pprm: pble resource manager
+ * @pchunk: chunk of memory to add
+ */
+enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk)
+{
+ u64 sizeofbitmap;
+ enum irdma_status_code ret_code = 0;
+
+ if (pchunk->size & 0xfff)
+ return IRDMA_ERR_PARAM;
+
+ sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift;
+
+ ret_code = irdma_allocate_virt_mem(pchunk->dev->hw,
+ &pchunk->bitmapmem,
+ sizeofbitmap >> 3);
+
+ if (ret_code)
+ return ret_code;
+
+ pchunk->bitmapbuf = pchunk->bitmapmem.va;
+ bitmap_zero(pchunk->bitmapbuf, sizeofbitmap);
+
+ pchunk->sizeofbitmap = sizeofbitmap;
+ /* each pble is 8 bytes hence shift by 3 */
+ pprm->total_pble_alloc += pchunk->size >> 3;
+ pprm->free_pble_cnt += pchunk->size >> 3;
+
+ return 0;
+}
+
+/**
+ * irdma_prm_add_pble_mem - get pble's from prm
+ * @pprm: pble resource manager
+ * @pchunkinfo: return information about chunk where pble's were acquired
+ * @memsize: size of pble memory needed
+ * @vaddr: returns virtual address of pble memory
+ * @fpm_addr: returns fpm address of pble memory
+ */
+enum irdma_status_code irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo,
+ u32 mem_size,
+ u64 *vaddr,
+ u64 *fpm_addr)
+{
+ u64 bits_needed;
+ u64 bit_idx = PBLE_INVALID_IDX;
+ struct irdma_chunk *pchunk = NULL;
+ struct list_head *chunk_entry = pprm->clist.next;
+ u32 offset;
+ unsigned long flags;
+ *vaddr = 0;
+ *fpm_addr = 0;
+
+ bits_needed = (mem_size + (1 << pprm->pble_shift) - 1) >> pprm->pble_shift;
+
+ spin_lock_irqsave(&pprm->prm_lock, flags);
+ while (chunk_entry != &pprm->clist) {
+ pchunk = (struct irdma_chunk *)chunk_entry;
+ bit_idx = bitmap_find_next_zero_area(pchunk->bitmapbuf,
+ pchunk->sizeofbitmap,
+ 0,
+ bits_needed,
+ 0);
+ if (bit_idx < pchunk->sizeofbitmap)
+ break;
+
+ /* list.next used macro */
+ chunk_entry = pchunk->list.next;
+ }
+
+ if (!pchunk || bit_idx >= pchunk->sizeofbitmap) {
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+ return IRDMA_ERR_NO_MEMORY;
+ }
+
+ bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed);
+ offset = bit_idx << pprm->pble_shift;
+ *vaddr = (u64)((u8 *)pchunk->vaddr + offset);
+ *fpm_addr = pchunk->fpm_addr + offset;
+
+ chunkinfo->pchunk = pchunk;
+ chunkinfo->bit_idx = bit_idx;
+ chunkinfo->bits_used = bits_needed;
+ /* 3 is sizeof pble divide */
+ pprm->free_pble_cnt -= chunkinfo->bits_used << (pprm->pble_shift - 3);
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_prm_return pbles - return pbles back to prm
+ * @pprm: pble resource manager
+ * @pchunkinfo: chunk where pble's were acquired and to be freed
+ */
+void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pprm->prm_lock, flags);
+ pprm->free_pble_cnt += chunkinfo->bits_used << (pprm->pble_shift - 3);
+ bitmap_clear(chunkinfo->pchunk->bitmapbuf,
+ chunkinfo->bit_idx,
+ chunkinfo->bits_used);
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+}
+
+/**
+ * irdma_free_paged_mem - free virtual paged memory back to system
+ * @chunk: chunk to free with paged memory
+ */
+void irdma_pble_free_paged_mem(struct irdma_chunk *chunk)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)chunk->dev->hw->dev_context;
+ int i;
+
+ if (!chunk->pg_cnt)
+ goto done;
+
+ for (i = 0; i < chunk->pg_cnt; i++)
+ dma_unmap_page(&pcidev->dev,
+ chunk->dmainfo.dmaaddrs[i],
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+done:
+ kfree(chunk->dmainfo.dmaaddrs);
+ chunk->dmainfo.dmaaddrs = NULL;
+ vfree((void *)chunk->vaddr);
+ chunk->vaddr = 0;
+ chunk->type = 0;
+}
+
+/**
+ * irdma_pble_get_paged_mem -allocate paged memory for pbles
+ * @chunk: chunk to add for paged memory
+ * @pg_cnt: number of pages needed
+ */
+enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk, int pg_cnt)
+{
+ struct pci_dev *pcidev = (struct pci_dev *)chunk->dev->hw->dev_context;
+ struct page *page;
+ u8 *addr;
+ u32 size;
+ int i;
+
+ chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
+ if (!chunk->dmainfo.dmaaddrs)
+ return IRDMA_ERR_NO_MEMORY;
+
+ size = PAGE_SIZE * pg_cnt;
+ chunk->vaddr = (u64)vmalloc(size);
+ if (!chunk->vaddr) {
+ kfree(chunk->dmainfo.dmaaddrs);
+ chunk->dmainfo.dmaaddrs = NULL;
+ return IRDMA_ERR_NO_MEMORY;
+ }
+
+ chunk->size = size;
+ addr = (u8 *)chunk->vaddr;
+
+ for (i = 0; i < pg_cnt; i++) {
+ page = vmalloc_to_page((void *)addr);
+ if (!page)
+ break;
+
+ chunk->dmainfo.dmaaddrs[i] = dma_map_page(&pcidev->dev,
+ page,
+ 0,
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&pcidev->dev, chunk->dmainfo.dmaaddrs[i]))
+ break;
+
+ addr += PAGE_SIZE;
+ }
+
+ chunk->pg_cnt = i;
+ chunk->type = PBLE_SD_PAGED;
+ if (i == pg_cnt)
+ return 0;
+
+ irdma_pble_free_paged_mem(chunk);
+
+ return IRDMA_ERR_NO_MEMORY;
+}
+
+/**
+ * irdma_alloc_ws_node_id - Allocate a tx scheduler node ID
+ * @dev: device pointer
+ */
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+ u32 node_id;
+
+ if (irdma_alloc_rsrc(rf, rf->allocated_ws_nodes,
+ rf->max_ws_node_id,
+ &node_id, &rf->next_ws_node_id))
+ return IRDMA_WS_NODE_INVALID;
+
+ return (u16)node_id;
+}
+
+/**
+ * irdma_free_ws_node_id - Free a tx scheduler node ID
+ * @dev: device pointer
+ */
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id)
+{
+ struct irdma_pci_f *rf = dev->back_dev;
+
+ irdma_free_rsrc(rf, rf->allocated_ws_nodes, (u32)node_id);
+}
+
+/**
+ * irdma_modify_qp_to_err - Modify a QP to error
+ * @sc_qp: qp structure
+ */
+void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp)
+{
+ struct irdma_qp *qp = sc_qp->back_qp;
+ struct ib_qp_attr attr;
+
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+}
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 16/19] RDMA/irdma: Add dynamic tracing for CM
From: Shiraz Saleem @ 2019-02-15 17:11 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Michael J. Ruhl, Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Add dynamic tracing functionality to debug connection
management issues.
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/trace.c | 113 ++++++++
drivers/infiniband/hw/irdma/trace.h | 4 +
drivers/infiniband/hw/irdma/trace_cm.h | 459 +++++++++++++++++++++++++++++++++
3 files changed, 576 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/trace.c
create mode 100644 drivers/infiniband/hw/irdma/trace.h
create mode 100644 drivers/infiniband/hw/irdma/trace_cm.h
diff --git a/drivers/infiniband/hw/irdma/trace.c b/drivers/infiniband/hw/irdma/trace.c
new file mode 100644
index 0000000..0b6fbdb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+const char *print_ip_addr(struct trace_seq *p, u32 *addr, u16 port, bool ipv4)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (ipv4) {
+ u32 myaddr = ntohl(*addr);
+
+ trace_seq_printf(p, "%pI4:%d", &myaddr, ntohs(port));
+ } else {
+ trace_seq_printf(p, "%pI6:%d", addr, ntohs(port));
+ }
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *parse_iw_event_type(enum iw_cm_event_type iw_type)
+{
+ switch (iw_type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ return "IwRequest";
+ case IW_CM_EVENT_CONNECT_REPLY:
+ return "IwReply";
+ case IW_CM_EVENT_ESTABLISHED:
+ return "IwEstablished";
+ case IW_CM_EVENT_DISCONNECT:
+ return "IwDisconnect";
+ case IW_CM_EVENT_CLOSE:
+ return "IwClose";
+ }
+
+ return "Unknown";
+}
+
+const char *parse_cm_event_type(enum irdma_cm_event_type cm_type)
+{
+ switch (cm_type) {
+ case IRDMA_CM_EVENT_ESTABLISHED:
+ return "CmEstablished";
+ case IRDMA_CM_EVENT_MPA_REQ:
+ return "CmMPA_REQ";
+ case IRDMA_CM_EVENT_MPA_CONNECT:
+ return "CmMPA_CONNECT";
+ case IRDMA_CM_EVENT_MPA_ACCEPT:
+ return "CmMPA_ACCEPT";
+ case IRDMA_CM_EVENT_MPA_REJECT:
+ return "CmMPA_REJECT";
+ case IRDMA_CM_EVENT_MPA_ESTABLISHED:
+ return "CmMPA_ESTABLISHED";
+ case IRDMA_CM_EVENT_CONNECTED:
+ return "CmConnected";
+ case IRDMA_CM_EVENT_RESET:
+ return "CmReset";
+ case IRDMA_CM_EVENT_ABORTED:
+ return "CmAborted";
+ case IRDMA_CM_EVENT_UNKNOWN:
+ return "none";
+ }
+ return "Unknown";
+}
+
+const char *parse_cm_state(enum irdma_cm_node_state state)
+{
+ switch (state) {
+ case IRDMA_CM_STATE_UNKNOWN:
+ return "UNKNOWN";
+ case IRDMA_CM_STATE_INITED:
+ return "INITED";
+ case IRDMA_CM_STATE_LISTENING:
+ return "LISTENING";
+ case IRDMA_CM_STATE_SYN_RCVD:
+ return "SYN_RCVD";
+ case IRDMA_CM_STATE_SYN_SENT:
+ return "SYN_SENT";
+ case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
+ return "ONE_SIDE_ESTABLISHED";
+ case IRDMA_CM_STATE_ESTABLISHED:
+ return "ESTABLISHED";
+ case IRDMA_CM_STATE_ACCEPTING:
+ return "ACCEPTING";
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ return "MPAREQ_SENT";
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ return "MPAREQ_RCVD";
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ return "MPAREJ_RECVD";
+ case IRDMA_CM_STATE_OFFLOADED:
+ return "OFFLOADED";
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ return "FIN_WAIT1";
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ return "FIN_WAIT2";
+ case IRDMA_CM_STATE_CLOSE_WAIT:
+ return "CLOSE_WAIT";
+ case IRDMA_CM_STATE_TIME_WAIT:
+ return "TIME_WAIT";
+ case IRDMA_CM_STATE_LAST_ACK:
+ return "LAST_ACK";
+ case IRDMA_CM_STATE_CLOSING:
+ return "CLOSING";
+ case IRDMA_CM_STATE_LISTENER_DESTROYED:
+ return "LISTENER_DESTROYED";
+ case IRDMA_CM_STATE_CLOSED:
+ return "CLOSED";
+ }
+ return ("Bad state");
+}
diff --git a/drivers/infiniband/hw/irdma/trace.h b/drivers/infiniband/hw/irdma/trace.h
new file mode 100644
index 0000000..40cd65e
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "trace_cm.h"
diff --git a/drivers/infiniband/hw/irdma/trace_cm.h b/drivers/infiniband/hw/irdma/trace_cm.h
new file mode 100644
index 0000000..551d1f5
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace_cm.h
@@ -0,0 +1,459 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#if !defined(__TRACE_CM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRACE_CM_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "main.h"
+
+const char *print_ip_addr(struct trace_seq *p, u32 *addr, u16 port, bool ivp4);
+const char *parse_iw_event_type(enum iw_cm_event_type iw_type);
+const char *parse_cm_event_type(enum irdma_cm_event_type cm_type);
+const char *parse_cm_state(enum irdma_cm_node_state);
+#define __print_ip_addr(addr, port, ipv4) print_ip_addr(p, addr, port, ipv4)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM icrdma_cm
+
+TRACE_EVENT(irdma_create_listen,
+ TP_PROTO(struct irdma_device *iwdev, struct irdma_cm_info *cm_info),
+ TP_ARGS(iwdev, cm_info),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __dynamic_array(u32, laddr, 4)
+ __field(u16, lport)
+ __field(bool, ipv4)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->lport = cm_info->loc_port;
+ __entry->ipv4 = cm_info->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ cm_info->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p loc: %s",
+ __entry->iwdev,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+TRACE_EVENT(irdma_dec_refcnt_listen,
+ TP_PROTO(struct irdma_cm_listener *listener, void *caller),
+ TP_ARGS(listener, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u32, refcnt)
+ __dynamic_array(u32, laddr, 4)
+ __field(u16, lport)
+ __field(bool, ipv4)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->iwdev = listener->iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->ipv4 = listener->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS loc: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DECLARE_EVENT_CLASS(listener_template,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u16, lport)
+ __field(u16, vlan_id)
+ __field(bool, ipv4)
+ __field(enum irdma_cm_listener_state,
+ state)
+ __dynamic_array(u32, laddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = listener->iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->vlan_id = listener->vlan_id;
+ __entry->ipv4 = listener->ipv4;
+ __entry->state = listener->listener_state;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p vlan=%d loc: %s",
+ __entry->iwdev,
+ __entry->vlan_id,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(listener_template, irdma_find_listener,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener));
+
+DEFINE_EVENT(listener_template, irdma_del_multiple_qhash,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener));
+
+TRACE_EVENT(irdma_negotiate_mpa_v2,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node),
+ TP_STRUCT__entry(__field(struct irdma_cm_node *, cm_node)
+ __field(u16, ord_size)
+ __field(u16, ird_size)
+ ),
+ TP_fast_assign(__entry->cm_node = cm_node;
+ __entry->ord_size = cm_node->ord_size;
+ __entry->ird_size = cm_node->ird_size;
+ ),
+ TP_printk("MPVA2 Negotiated cm_node=%p ORD:[%d], IRD:[%d]",
+ __entry->cm_node,
+ __entry->ord_size,
+ __entry->ird_size
+ )
+);
+
+DECLARE_EVENT_CLASS(tos_template,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u8, tos)
+ __field(u8, user_pri)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->tos = tos;
+ __entry->user_pri = user_pri;
+ ),
+ TP_printk("iwdev=%p TOS:[%d] UP:[%d]",
+ __entry->iwdev,
+ __entry->tos,
+ __entry->user_pri
+ )
+);
+
+DEFINE_EVENT(tos_template, irdma_listener_tos,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri));
+
+DEFINE_EVENT(tos_template, irdma_dcb_tos,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri));
+
+DECLARE_EVENT_CLASS(qhash_template,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u16, lport)
+ __field(u16, vlan_id)
+ __field(bool, ipv4)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, mac, ETH_ALEN)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->vlan_id = listener->vlan_id;
+ __entry->ipv4 = listener->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ether_addr_copy(__get_dynamic_array(mac),
+ dev_addr);
+ ),
+ TP_printk("iwdev=%p vlan=%d MAC=%pM loc: %s",
+ __entry->iwdev,
+ __entry->vlan_id,
+ __get_dynamic_array(mac),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(qhash_template, irdma_add_mqh_6,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener, char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr));
+
+DEFINE_EVENT(qhash_template, irdma_add_mqh_4,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener, char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr));
+
+TRACE_EVENT(irdma_addr_resolve,
+ TP_PROTO(struct irdma_device *iwdev, char *dev_addr),
+ TP_ARGS(iwdev, dev_addr),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __dynamic_array(u8, mac, ETH_ALEN)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ ether_addr_copy(__get_dynamic_array(mac), dev_addr);
+ ),
+ TP_printk("iwdev=%p MAC=%pM", __entry->iwdev,
+ __get_dynamic_array(mac)
+ )
+);
+
+TRACE_EVENT(irdma_send_cm_event,
+ TP_PROTO(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type, int status, void *caller),
+ TP_ARGS(cm_node, cm_id, type, status, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(struct iw_cm_id *, cm_id)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __field(enum iw_cm_event_type, type)
+ __field(int, status)
+ __field(void *, caller)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->cm_id = cm_id;
+ __entry->refcount = atomic_read(&cm_node->ref_count);
+ __entry->state = cm_node->state;
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ __entry->type = type;
+ __entry->status = status;
+ __entry->caller = caller;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS cm_id=%p node=%p refcnt=%d vlan_id=%d accel=%d state=%s event_type=%s status=%d loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_id,
+ __entry->cm_node,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ parse_iw_event_type(__entry->type),
+ __entry->status,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+TRACE_EVENT(irdma_send_cm_event_no_node,
+ TP_PROTO(struct iw_cm_id *cm_id, enum iw_cm_event_type type,
+ int status, void *caller),
+ TP_ARGS(cm_id, type, status, caller),
+ TP_STRUCT__entry(__field(struct iw_cm_id *, cm_id)
+ __field(enum iw_cm_event_type, type)
+ __field(int, status)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->cm_id = cm_id;
+ __entry->type = type;
+ __entry->status = status;
+ __entry->caller = caller;
+ ),
+ TP_printk("cm_id=%p caller=%pS event_type=%s status=%d",
+ __entry->cm_id,
+ __entry->caller,
+ parse_iw_event_type(__entry->type),
+ __entry->status
+ )
+);
+
+DECLARE_EVENT_CLASS(cm_node_template,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __field(enum irdma_cm_event_type, type)
+ __field(void *, caller)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->refcount = atomic_read(&cm_node->ref_count);
+ __entry->state = cm_node->state;
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ __entry->type = type;
+ __entry->caller = caller;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS node=%p refcnt=%d vlan_id=%d accel=%d state=%s event_type=%s loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_node,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ parse_cm_event_type(__entry->type),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(cm_node_template, irdma_create_event,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_accept,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_connect,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_reject,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_find_node,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_send_reset,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_rem_ref_cm_node,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_cm_event_handler,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+TRACE_EVENT(open_err_template,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, reset)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->state = cm_node->state;
+ __entry->reset = reset;
+ __entry->caller = caller;
+ ),
+ TP_printk("iwdev=%p caller=%pS node%p reset=%d state=%s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_node,
+ __entry->reset,
+ parse_cm_state(__entry->state)
+ )
+);
+
+DEFINE_EVENT(open_err_template, irdma_active_open_err,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller));
+
+DEFINE_EVENT(open_err_template, irdma_passive_open_err,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller));
+
+DECLARE_EVENT_CLASS(cm_node_ah_template,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(struct irdma_sc_ah *, ah)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->ah = cm_node->ah;
+ __entry->refcount = atomic_read(&cm_node->ref_count);
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->state = cm_node->state;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p node=%p ah=%p refcnt=%d vlan_id=%d accel=%d state=%s loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->cm_node,
+ __entry->ah,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(cm_node_ah_template, irdma_cm_free_ah,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node));
+
+DEFINE_EVENT(cm_node_ah_template, irdma_create_ah,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node));
+
+#endif /* __TRACE_CM_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_cm
+#include <trace/define_trace.h>
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 17/19] RDMA/irdma: Add ABI definitions
From: Shiraz Saleem @ 2019-02-15 17:11 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Add ABI definitions for irdma.
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
include/uapi/rdma/irdma-abi.h | 140 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 140 insertions(+)
create mode 100644 include/uapi/rdma/irdma-abi.h
diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h
new file mode 100644
index 0000000..5b0e2d5
--- /dev/null
+++ b/include/uapi/rdma/irdma-abi.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2006 - 2019 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef IRDMA_ABI_H
+#define IRDMA_ABI_H
+
+#include <linux/types.h>
+
+#define IRDMA_ABI_VER 6
+
+enum irdma_memreg_type {
+ IW_MEMREG_TYPE_MEM = 0,
+ IW_MEMREG_TYPE_QP = 1,
+ IW_MEMREG_TYPE_CQ = 2,
+ IW_MEMREG_TYPE_RSVD = 3,
+ IW_MEMREG_TYPE_MW = 4,
+};
+
+struct irdma_alloc_ucontext_req {
+ __u32 rsvd32;
+ __u8 userspace_ver;
+ __u8 rsvd8[3];
+};
+
+struct irdma_alloc_ucontext_resp {
+ __u8 kernel_ver;
+ __u8 rsvd[7];
+ struct irdma_hw_attrs hw_attrs;
+};
+
+struct i40iw_alloc_ucontext_resp {
+ __u32 max_pds; /* maximum pds allowed for this user process */
+ __u32 max_qps; /* maximum qps allowed for this user process */
+ __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
+ __u8 kernel_ver;
+ __u8 reserved[3];
+};
+
+struct irdma_alloc_pd_resp {
+ __u32 pd_id;
+ __u8 rsvd[4];
+};
+
+struct irdma_create_cq_req {
+ __aligned_u64 user_cq_buf;
+ __aligned_u64 user_shadow_area;
+};
+
+struct irdma_create_qp_req {
+ __aligned_u64 user_wqe_bufs;
+ __aligned_u64 user_compl_ctx;
+};
+
+struct i40iw_create_qp_req {
+ __aligned_u64 user_wqe_buffers;
+ __aligned_u64 user_compl_ctx;
+
+ /* UDA QP PHB */
+ __aligned_u64 user_sq_phb; /* place for VA of the sq phb buff */
+ __aligned_u64 user_rq_phb; /* place for VA of the rq phb buff */
+};
+
+struct irdma_mem_reg_req {
+ __u16 reg_type; /* Memory, QP or CQ */
+ __u16 cq_pages;
+ __u16 rq_pages;
+ __u16 sq_pages;
+};
+
+struct irdma_create_cq_resp {
+ __u32 cq_id;
+ __u32 cq_size;
+};
+
+struct i40iw_create_cq_resp {
+ __u32 cq_id;
+ __u32 cq_size;
+ __u32 mmap_db_index;
+ __u32 reserved;
+};
+
+struct irdma_create_qp_resp {
+ __u32 qp_id;
+ __u32 actual_sq_size;
+ __u32 actual_rq_size;
+ __u32 irdma_drv_opt;
+ __u16 push_idx;
+ __u8 lsmm;
+ __u8 rsvd;
+ __u32 qp_caps;
+};
+
+struct i40iw_create_qp_resp {
+ __u32 qp_id;
+ __u32 actual_sq_size;
+ __u32 actual_rq_size;
+ __u32 i40iw_drv_opt;
+ __u16 push_idx;
+ __u8 lsmm;
+ __u8 rsvd2;
+};
+
+struct irdma_create_ah_resp {
+ __u32 ah_id;
+ __u32 rsvd[4];
+};
+#endif /* IRDMA_ABI_H */
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 12/19] RDMA/irdma: Implement device supported verb APIs
From: Shiraz Saleem @ 2019-02-15 17:10 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Implement device supported verb APIs. The supported APIs
vary based on the underlying transport the ibdev is
registered as (i.e. iWARP or RoCEv2).
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/verbs.c | 4166 ++++++++++++++++++++++++++++++
drivers/infiniband/hw/irdma/verbs.h | 183 ++
include/uapi/rdma/rdma_user_ioctl_cmds.h | 1 +
3 files changed, 4350 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/verbs.c
create mode 100644 drivers/infiniband/hw/irdma/verbs.h
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
new file mode 100644
index 0000000..5e38bdc
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -0,0 +1,4166 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/random.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <linux/irq.h>
+#include <asm/byteorder.h>
+#include <net/ip.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include "main.h"
+
+/**
+ * irdma_query_device - get device attributes
+ * @ibdev: device pointer from stack
+ * @props: returning device attributes
+ * @udata: user data
+ */
+static int irdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct pci_dev *pdev = iwdev->rf->pdev;
+
+ if (udata->inlen || udata->outlen)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
+ props->fw_ver = (u64)FW_MAJOR_VER(&rf->sc_dev) << 32 |
+ FW_MINOR_VER(&rf->sc_dev) << 16;
+ props->device_cap_flags = iwdev->device_cap_flags;
+ props->vendor_id = pdev->vendor;
+ props->vendor_part_id = pdev->device;
+ props->hw_ver = (u32)rf->sc_dev.pci_rev;
+ props->max_mr_size = rf->sc_dev.hw_attrs.max_hw_outbound_msg_size;
+ props->max_qp = rf->max_qp - rf->used_qps;
+ props->max_qp_wr = rf->sc_dev.hw_attrs.max_qp_wr;
+ props->max_send_sge = rf->sc_dev.hw_attrs.max_hw_wq_frags;
+ props->max_recv_sge = rf->sc_dev.hw_attrs.max_hw_wq_frags;
+ props->max_cq = rf->max_cq - rf->used_cqs;
+ props->max_cqe = rf->max_cqe;
+ props->max_mr = rf->max_mr - rf->used_mrs;
+ props->max_mw = props->max_mr;
+ props->max_pd = rf->max_pd - rf->used_pds;
+ props->max_sge_rd = rf->sc_dev.hw_attrs.max_hw_read_sges;
+ props->max_qp_rd_atom = rf->sc_dev.hw_attrs.max_hw_ird;
+ props->max_qp_init_rd_atom = props->max_qp_rd_atom;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->max_map_per_fmr = 1;
+ props->max_ah = rf->max_ah;
+ props->max_mcast_grp = rf->max_mcg;
+ props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
+ props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX;
+ props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR;
+
+ return 0;
+}
+
+/**
+ * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
+ * @link speed: netdev phy link speed
+ * @active_speed: IB port speed
+ * @active width: IB port width
+ */
+static void irdma_get_eth_speed_and_width(u32 link_speed,
+ u8 *active_speed,
+ u8 *active_width)
+{
+ if (link_speed <= SPEED_1000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ } else if (link_speed <= SPEED_10000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_FDR10;
+ } else if (link_speed <= SPEED_20000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_DDR;
+ } else if (link_speed <= SPEED_25000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ } else if (link_speed <= SPEED_40000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR10;
+ } else {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ }
+}
+
+/**
+ * irdma_query_port - get port attributes
+ * @ibdev: device pointer from stack
+ * @port: port number for query
+ * @props: returning device attributes
+ */
+static int irdma_query_port(struct ib_device *ibdev,
+ u8 port,
+ struct ib_port_attr *props)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct net_device *netdev = iwdev->netdev;
+
+ /* no need to zero out props here. done by caller */
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+
+ props->lid = 1;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ if (netif_carrier_ok(netdev) && netif_running(netdev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+ }
+ irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
+ &props->active_width);
+
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->gid_tbl_len = 32;
+ props->ip_gids = true;
+ } else {
+ props->gid_tbl_len = 1;
+ }
+ props->pkey_tbl_len = 1;
+ props->qkey_viol_cntr = 0;
+ props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_REINIT_SUP;
+ props->max_msg_sz = iwdev->rf->sc_dev.hw_attrs.max_hw_outbound_msg_size;
+
+ return 0;
+}
+
+/**
+ * irdma_alloc_ucontext - Allocate the user context data structure
+ * @ibdev: device pointer from stack
+ * @udata: user data
+ *
+ * This keeps track of all objects associated with a particular
+ * user-mode client.
+ */
+static struct ib_ucontext *irdma_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_alloc_ucontext_req req;
+ struct irdma_alloc_ucontext_resp uresp = {};
+ struct i40iw_alloc_ucontext_resp uresp_gen1 = {};
+ struct irdma_ucontext *ucontext;
+ int err;
+
+ if (ib_copy_from_udata(&req, udata, sizeof(req)))
+ return ERR_PTR(-EINVAL);
+
+ if (req.userspace_ver > IRDMA_ABI_VER) {
+ err = -EINVAL;
+ goto ver_error;
+ }
+
+ ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
+ if (!ucontext)
+ return ERR_PTR(-ENOMEM);
+
+ ucontext->iwdev = iwdev;
+ ucontext->abi_ver = req.userspace_ver;
+
+ if (iwdev->rf->sc_dev.hw_attrs.hw_rev == IRDMA_GEN_1) {
+ uresp_gen1.max_qps = iwdev->rf->max_qp;
+ uresp_gen1.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds;
+ uresp_gen1.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2;
+ uresp_gen1.kernel_ver = req.userspace_ver;
+ if (ib_copy_to_udata(udata, &uresp_gen1, sizeof(uresp_gen1))) {
+ kfree(ucontext);
+ return ERR_PTR(-EFAULT);
+ }
+ } else {
+ if (req.userspace_ver < IRDMA_ABI_VER) {
+ err = -EINVAL;
+ goto ver_error;
+ }
+ uresp.kernel_ver = req.userspace_ver;
+ uresp.hw_attrs = iwdev->rf->sc_dev.hw_attrs;
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ kfree(ucontext);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
+ spin_lock_init(&ucontext->cq_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
+ spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+
+ return &ucontext->ibucontext;
+
+ver_error:
+ irdma_dev_err(&iwdev->rf->sc_dev,
+ "Invalid userspace driver version detected. Detected version %d, should be %d\n",
+ req.userspace_ver, IRDMA_ABI_VER);
+ uresp.kernel_ver = IRDMA_ABI_VER;
+ return ERR_PTR(-EINVAL);
+}
+
+/**
+ * irdma_dealloc_ucontext - deallocate the user context data structure
+ * @context: user context created during alloc
+ */
+static int irdma_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct irdma_ucontext *ucontext = to_ucontext(context);
+
+ kfree(ucontext);
+
+ return 0;
+}
+
+/**
+ * irdma_disassociate_ucontext - Disassociate user context
+ * @context: ib user context
+ */
+static void irdma_disassociate_ucontext(struct ib_ucontext *context)
+{
+}
+
+/**
+ * irdma_mmap - user memory map
+ * @context: context created during alloc
+ * @vma: kernel info for user memory map
+ */
+static int irdma_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ struct irdma_ucontext *ucontext;
+ u64 db_addr_offset;
+ u64 push_offset;
+
+ ucontext = to_ucontext(context);
+ db_addr_offset = ucontext->iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
+ if (ucontext->iwdev->rf->sc_dev.is_pf) {
+ push_offset = IRDMA_PUSH_OFFSET;
+ if (vma->vm_pgoff)
+ vma->vm_pgoff += IRDMA_PF_FIRST_PUSH_PAGE_INDEX - 1;
+ } else {
+ push_offset = IRDMA_VF_PUSH_OFFSET;
+ if (vma->vm_pgoff)
+ vma->vm_pgoff += IRDMA_VF_FIRST_PUSH_PAGE_INDEX - 1;
+ }
+
+ vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
+ if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_private_data = ucontext;
+ } else {
+ if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ }
+
+ return rdma_user_mmap_io(context,
+ vma,
+ vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->rf->pdev, 0)
+ >> PAGE_SHIFT),
+ PAGE_SIZE,
+ vma->vm_page_prot);
+
+}
+
+/**
+ * irdma_alloc_push_page - allocate a push page for qp
+ * @rf: RDMA PCI function
+ * @qp: hardware control qp
+ */
+static void irdma_alloc_push_page(struct irdma_pci_f *rf,
+ struct irdma_sc_qp *qp)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ enum irdma_status_code status;
+
+ if (qp->push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX)
+ return;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ atomic_inc(&cqp_request->refcount);
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 0;
+ cqp_info->in.u.manage_push_page.cqp = &rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status)
+ qp->push_idx = cqp_request->compl_info.op_ret_val;
+ else
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "CQP-OP Push page fail");
+
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_alloc_pd - allocate protection domain
+ * @pd: PD pointer
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static int irdma_alloc_pd(struct ib_pd *pd,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_alloc_pd_resp uresp = {};
+ struct irdma_sc_pd *sc_pd;
+ struct irdma_ucontext *ucontext;
+ u32 pd_id = 0;
+ int err;
+
+ if (iwdev->closing)
+ return -ENODEV;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id,
+ &rf->next_pd);
+ if (err)
+ return err;
+
+ sc_pd = &iwpd->sc_pd;
+ if (context) {
+ ucontext = to_ucontext(context);
+ dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
+ uresp.pd_id = pd_id;
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, -1);
+ }
+
+ irdma_add_pdusecount(iwpd);
+
+ return 0;
+error:
+ irdma_free_rsrc(rf, rf->allocated_pds, pd_id);
+
+ return err;
+}
+
+/**
+ * irdma_dealloc_pd - deallocate pd
+ * @ibpd: ptr of pd to be deallocated
+ */
+static void irdma_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+
+ irdma_rem_pdusecount(iwpd, iwdev);
+}
+
+/**
+ * irdma_get_pbl - Retrieve pbl from a list given a virtual
+ * address
+ * @va: user virtual address
+ * @pbl_list: pbl list to search in (QP's or CQ's)
+ */
+static struct irdma_pbl *irdma_get_pbl(unsigned long va,
+ struct list_head *pbl_list)
+{
+ struct irdma_pbl *iwpbl;
+
+ list_for_each_entry(iwpbl, pbl_list, list) {
+ if (iwpbl->user_base == va) {
+ list_del(&iwpbl->list);
+ iwpbl->on_list = false;
+ return iwpbl;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_clean_cqes - clean cq entries for qp
+ * @iwqp: qp ptr (user or kernel)
+ * @iwcq: cq ptr
+ */
+static void irdma_clean_cqes(struct irdma_qp *iwqp,
+ struct irdma_cq *iwcq)
+{
+ struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
+
+ ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
+}
+
+/**
+ * irdma_destroy_qp - destroy qp
+ * @ibqp: qp's ib pointer also to get to device's qp address
+ */
+static int irdma_destroy_qp(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+
+ if (atomic_read(&iwqp->mcast_ref_cnt))
+ return -EBUSY;
+
+ iwqp->destroyed = 1;
+ if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 0, 0, 0);
+
+ if (!iwqp->user_mode) {
+ if (iwqp->iwscq) {
+ irdma_clean_cqes(iwqp, iwqp->iwscq);
+ if (iwqp->iwrcq != iwqp->iwscq)
+ irdma_clean_cqes(iwqp, iwqp->iwrcq);
+ }
+ }
+
+ irdma_rem_ref(&iwqp->ibqp);
+
+ return 0;
+}
+
+/**
+ * irdma_setup_virt_qp - setup for allocation of virtual qp
+ * @dev: iwarp device
+ * @qp: qp ptr
+ * @init_info: initialize info to return
+ */
+static int irdma_setup_virt_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *init_info)
+{
+ struct irdma_pbl *iwpbl = iwqp->iwpbl;
+ struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
+
+ iwqp->page = qpmr->sq_page;
+ init_info->shadow_area_pa = qpmr->shadow;
+ if (iwpbl->pbl_allocated) {
+ init_info->virtual_map = true;
+ init_info->sq_pa = qpmr->sq_pbl.idx;
+ init_info->rq_pa = qpmr->rq_pbl.idx;
+ } else {
+ init_info->sq_pa = qpmr->sq_pbl.addr;
+ init_info->rq_pa = qpmr->rq_pbl.addr;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_setup_kmode_qp - setup initialization for kernel mode qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ */
+static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *info)
+{
+ struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem;
+ u32 sqdepth, rqdepth;
+ u8 sqshift, rqshift;
+ u32 size;
+ enum irdma_status_code status;
+ struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+
+ irdma_get_wqe_shift(&iwdev->rf->sc_dev.hw_attrs,
+ ukinfo->max_sq_frag_cnt + 1,
+ ukinfo->max_inline_data,
+ &sqshift);
+ status = irdma_get_sqdepth(&iwdev->rf->sc_dev.hw_attrs,
+ ukinfo->sq_size,
+ sqshift,
+ &sqdepth);
+ if (status)
+ return -ENOMEM;
+
+ irdma_get_wqe_shift(&iwdev->rf->sc_dev.hw_attrs,
+ ukinfo->max_rq_frag_cnt,
+ 0,
+ &rqshift);
+ status = irdma_get_rqdepth(&iwdev->rf->sc_dev.hw_attrs,
+ ukinfo->rq_size,
+ rqshift,
+ &rqdepth);
+ if (status)
+ return -ENOMEM;
+
+ size = sqdepth * sizeof(struct irdma_sq_uk_wr_trk_info) +
+ (rqdepth << 3);
+ iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
+ ukinfo->sq_wrtrk_array = (struct irdma_sq_uk_wr_trk_info *)
+ iwqp->kqp.wrid_mem;
+ if (!ukinfo->sq_wrtrk_array)
+ return -ENOMEM;
+
+ ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
+ size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE;
+ size += (IRDMA_SHADOW_AREA_SIZE << 3);
+
+ status = irdma_allocate_dma_mem(iwdev->rf->sc_dev.hw, mem, size, 256);
+ if (status) {
+ kfree(ukinfo->sq_wrtrk_array);
+ ukinfo->sq_wrtrk_array = NULL;
+ return -ENOMEM;
+ }
+
+ ukinfo->sq = mem->va;
+ info->sq_pa = mem->pa;
+ ukinfo->rq = &ukinfo->sq[sqdepth];
+ info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
+ info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->sq_size = sqdepth >> sqshift;
+ ukinfo->rq_size = rqdepth >> rqshift;
+ ukinfo->qp_id = iwqp->ibqp.qp_num;
+
+ return 0;
+}
+
+/**
+ * irdma_roce_mtu - set MTU to supported path MTU values
+ * @mtu: MTU
+ */
+static u32 irdma_roce_mtu(u32 mtu)
+{
+ if (mtu > 4096)
+ return 4096;
+ else if (mtu > 2048)
+ return 2048;
+ else if (mtu > 1024)
+ return 1024;
+ else if (mtu > 512)
+ return 512;
+ else
+ return 256;
+}
+
+/**
+ * irdma_create_qp - create qp
+ * @ibpd: ptr of pd
+ * @init_attr: attributes for qp
+ * @udata: user data for create qp
+ */
+static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_qp *iwqp;
+ struct irdma_ucontext *ucontext;
+ struct irdma_create_qp_req req;
+ struct irdma_create_qp_resp uresp = {};
+ struct i40iw_create_qp_resp uresp_gen1 = {};
+ u32 qp_num = 0;
+ void *mem;
+ enum irdma_status_code ret;
+ int err_code = 0;
+ int sq_size;
+ int rq_size;
+ struct irdma_sc_qp *qp;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_qp_init_info init_info = {};
+ struct irdma_create_qp_info *qp_info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_iwarp_offload_info *iwarp_info;
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp_info;
+ unsigned long flags;
+
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ if (init_attr->cap.max_inline_data > dev->hw_attrs.max_hw_inline)
+ init_attr->cap.max_inline_data = dev->hw_attrs.max_hw_inline;
+
+ if (init_attr->cap.max_send_sge > dev->hw_attrs.max_hw_wq_frags)
+ init_attr->cap.max_send_sge = dev->hw_attrs.max_hw_wq_frags;
+
+ if (init_attr->cap.max_recv_sge > dev->hw_attrs.max_hw_wq_frags)
+ init_attr->cap.max_recv_sge = dev->hw_attrs.max_hw_wq_frags;
+
+ sq_size = init_attr->cap.max_send_wr;
+ rq_size = init_attr->cap.max_recv_wr;
+
+ init_info.vsi = &iwdev->vsi;
+ init_info.qp_uk_init_info.hw_attrs = &dev->hw_attrs;
+ init_info.qp_uk_init_info.sq_size = sq_size;
+ init_info.qp_uk_init_info.rq_size = rq_size;
+ init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
+ init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+ init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
+
+ mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ iwqp = (struct irdma_qp *)mem;
+ iwqp->allocated_buf = mem;
+ qp = &iwqp->sc_qp;
+ qp->back_qp = (void *)iwqp;
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+
+ if (irdma_allocate_dma_mem(dev->hw,
+ &iwqp->q2_ctx_mem,
+ IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE,
+ 256)) {
+ err_code = -ENOMEM;
+ goto error;
+ }
+
+ init_info.q2 = iwqp->q2_ctx_mem.va;
+ init_info.q2_pa = iwqp->q2_ctx_mem.pa;
+ init_info.host_ctx = (void *)init_info.q2 + IRDMA_Q2_BUF_SIZE;
+ init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE;
+
+ if (init_attr->qp_type == IB_QPT_GSI && rf->sc_dev.is_pf)
+ qp_num = 1;
+ else
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
+ &qp_num, &rf->next_qp);
+ if (err_code)
+ goto error;
+
+ iwqp->iwdev = iwdev;
+ iwqp->iwpd = iwpd;
+ if (init_attr->qp_type == IB_QPT_GSI && !rf->sc_dev.is_pf)
+ iwqp->ibqp.qp_num = 1;
+ else
+ iwqp->ibqp.qp_num = qp_num;
+
+ qp = &iwqp->sc_qp;
+ iwqp->iwscq = to_iwcq(init_attr->send_cq);
+ iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
+ iwqp->host_ctx.va = init_info.host_ctx;
+ iwqp->host_ctx.pa = init_info.host_ctx_pa;
+ iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE;
+
+ init_info.pd = &iwpd->sc_pd;
+ init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
+ if (!rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1))
+ init_info.qp_uk_init_info.first_sq_wq = 1;
+ iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
+ init_waitqueue_head(&iwqp->waitq);
+ init_waitqueue_head(&iwqp->mod_qp_waitq);
+
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1)) {
+ if (init_attr->qp_type != IB_QPT_RC &&
+ init_attr->qp_type != IB_QPT_UD &&
+ init_attr->qp_type != IB_QPT_GSI) {
+ err_code = -EINVAL;
+ goto error;
+ }
+ } else {
+ if (init_attr->qp_type != IB_QPT_RC) {
+ err_code = -EINVAL;
+ goto error;
+ }
+ }
+
+ if (iwdev->push_mode)
+ irdma_alloc_push_page(rf, qp);
+
+ if (udata) {
+ err_code = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (err_code) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR,
+ "ib_copy_from_data fail\n");
+ goto error;
+ }
+
+ iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+ iwqp->user_mode = 1;
+ ucontext = to_ucontext(ibpd->uobject->context);
+ if (req.user_wqe_bufs) {
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwqp->iwpbl) {
+ err_code = -ENODATA;
+ irdma_debug(dev, IRDMA_DEBUG_ERR,
+ "no pbl info\n");
+ goto error;
+ }
+ }
+ err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+ } else {
+ err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info);
+ }
+
+ if (err_code) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "setup qp failed\n");
+ goto error;
+ }
+
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1)) {
+ if (init_attr->qp_type == IB_QPT_RC) {
+ init_info.type = IRDMA_QP_TYPE_ROCE_RC;
+ init_info.qp_uk_init_info.qp_caps =
+ IRDMA_SEND_WITH_IMM | IRDMA_WRITE_WITH_IMM | IRDMA_ROCE;
+ } else {
+ init_info.type = IRDMA_QP_TYPE_ROCE_UD;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | IRDMA_ROCE;
+ }
+ } else {
+ init_info.type = IRDMA_QP_TYPE_IWARP;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM;
+ }
+
+ ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
+ if (ret) {
+ err_code = -EPROTO;
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "qp_init fail\n");
+ goto error;
+ }
+
+ ctx_info = &iwqp->ctx_info;
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1)) {
+ iwqp->ctx_info.roce_info = &iwqp->roce_info;
+ iwqp->ctx_info.udp_info = &iwqp->udp_info;
+ udp_info = &iwqp->udp_info;
+ udp_info->snd_mss = irdma_roce_mtu(iwdev->vsi.mtu);
+ udp_info->cwnd = 0x400;
+ udp_info->src_port = 0xc000;
+ udp_info->dst_port = ROCE_V2_UDP_DPORT;
+ roce_info = &iwqp->roce_info;
+ ether_addr_copy(roce_info->mac_addr, iwdev->netdev->dev_addr);
+
+ if (init_attr->qp_type == IB_QPT_GSI && !rf->sc_dev.is_pf)
+ roce_info->is_qp1 = true;
+ roce_info->rd_en = true;
+ roce_info->wr_rdresp_en = true;
+ roce_info->dctcp_en = iwdev->dctcp_en;
+ roce_info->ecn_en = iwdev->ecn_en;
+ roce_info->dcqcn_en = iwdev->roce_dcqcn_en;
+ roce_info->timely_en = iwdev->roce_timely_en;
+
+ roce_info->ack_credits = 0x1E;
+ roce_info->ird_size = IRDMA_MAX_ENCODED_IRD_SIZE;
+ roce_info->ord_size = dev->hw_attrs.max_hw_ord;
+
+ if (!iwqp->user_mode) {
+ roce_info->priv_mode_en = true;
+ roce_info->fast_reg_en = true;
+ roce_info->udprivcq_en = true;
+ }
+ roce_info->roce_tver = 0;
+ } else {
+ iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
+ iwarp_info = &iwqp->iwarp_info;
+ ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr);
+ iwarp_info->rd_en = true;
+ iwarp_info->wr_rdresp_en = true;
+ iwarp_info->ib_rd_en = true;
+ if (!iwqp->user_mode) {
+ iwarp_info->priv_mode_en = true;
+ iwarp_info->fast_reg_en = true;
+ }
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->rdmap_ver = 1;
+ ctx_info->iwarp_info_valid = true;
+ }
+
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ ctx_info->push_mode_en = false;
+ } else {
+ ctx_info->push_mode_en = true;
+ ctx_info->push_idx = qp->push_idx;
+ }
+
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1)) {
+ ret =
+ dev->iw_priv_qp_ops->qp_setctx_roce(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ } else {
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ ctx_info->iwarp_info_valid = false;
+ }
+
+ cqp_request = irdma_get_cqp_request(iwcqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto error;
+ }
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+ memset(qp_info, 0, sizeof(*qp_info));
+ qp_info->mac_valid = true;
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE;
+
+ cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ ret = irdma_handle_cqp_op(rf, cqp_request);
+ if (ret) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP QP create fail");
+ err_code = -ENOMEM;
+ goto error;
+ }
+
+ irdma_add_ref(&iwqp->ibqp);
+ spin_lock_init(&iwqp->lock);
+ spin_lock_init(&iwqp->sc_qp.pfpdu.lock);
+ iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+ rf->qp_table[qp_num] = iwqp;
+ irdma_add_pdusecount(iwqp->iwpd);
+ irdma_add_devusecount(iwdev);
+ if (udata) {
+ if (iwdev->rf->sc_dev.hw_attrs.hw_rev == IRDMA_GEN_1) {
+ uresp_gen1.lsmm = 1;
+ uresp_gen1.actual_sq_size = sq_size;
+ uresp_gen1.actual_rq_size = rq_size;
+ uresp_gen1.qp_id = qp_num;
+ uresp_gen1.push_idx = qp->push_idx;
+ uresp_gen1.lsmm = 1;
+ err_code = ib_copy_to_udata(udata, &uresp_gen1, sizeof(uresp_gen1));
+ } else {
+ if (rdma_protocol_iwarp(&iwdev->iwibdev->ibdev, 1))
+ uresp.lsmm = 1;
+ uresp.actual_sq_size = sq_size;
+ uresp.actual_rq_size = rq_size;
+ uresp.qp_id = qp_num;
+ uresp.push_idx = qp->push_idx;
+ uresp.qp_caps = qp->qp_uk.qp_caps;
+
+ err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ }
+ if (err_code) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "copy_to_udata failed\n");
+ irdma_destroy_qp(&iwqp->ibqp);
+ return ERR_PTR(err_code);
+ }
+ }
+ init_completion(&iwqp->sq_drained);
+ init_completion(&iwqp->rq_drained);
+ return &iwqp->ibqp;
+
+error:
+ irdma_free_qp_rsrc(iwdev, iwqp, qp_num);
+
+ return ERR_PTR(err_code);
+}
+
+/**
+ * irdma_query - query qp attributes
+ * @ibqp: qp pointer
+ * @attr: attributes pointer
+ * @attr_mask: Not used
+ * @init_attr: qp attributes to return
+ */
+static int irdma_query_qp(struct ib_qp *ibqp,
+ struct ib_qp_attr *attr,
+ int attr_mask,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ attr->qp_state = iwqp->ibqp_state;
+ attr->cur_qp_state = iwqp->ibqp_state;
+ attr->qp_access_flags = 0;
+ attr->cap.max_send_wr = qp->qp_uk.sq_size - 1;
+ attr->cap.max_recv_wr = qp->qp_uk.rq_size - 1;
+ attr->cap.max_inline_data = qp->qp_uk.max_inline_data;
+ attr->cap.max_send_sge = qp->qp_uk.max_sq_frag_cnt;
+ attr->cap.max_recv_sge = qp->qp_uk.max_rq_frag_cnt;
+ attr->qkey = iwqp->roce_info.qkey;
+
+ init_attr->event_handler = iwqp->ibqp.event_handler;
+ init_attr->qp_context = iwqp->ibqp.qp_context;
+ init_attr->send_cq = iwqp->ibqp.send_cq;
+ init_attr->recv_cq = iwqp->ibqp.recv_cq;
+ init_attr->srq = iwqp->ibqp.srq;
+ init_attr->cap = attr->cap;
+
+ return 0;
+}
+
+/**
+ * irdma_query_pkey - Query partition key
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: index of pkey
+ * @pkey: pointer to store the pkey
+ */
+static int irdma_query_pkey(struct ib_device *ibdev,
+ u8 port,
+ u16 index,
+ u16 *pkey)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1))
+ *pkey = 0xFFFF;
+ else
+ *pkey = 0;
+
+ return 0;
+}
+
+/**
+ * irdma_modify_qp_roce - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int irdma_modify_qp_roce(struct ib_qp *ibqp,
+ struct ib_qp_attr *attr,
+ int attr_mask,
+ struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(ibqp->pd);
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp_info;
+ struct irdma_modify_qp_info info = {};
+ unsigned long flags;
+ u8 issue_modify_qp = 0;
+ int ret = 0;
+
+ ctx_info = &iwqp->ctx_info;
+ roce_info = &iwqp->roce_info;
+ udp_info = &iwqp->udp_info;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ roce_info->dest_qp = attr->dest_qp_num;
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ irdma_query_pkey(ibqp->device, 0, attr->pkey_index,
+ &roce_info->p_key);
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ roce_info->qkey = attr->qkey;
+
+ if (attr_mask & IB_QP_PORT)
+ iwqp->roce_ah.av.attrs.port_num = attr->ah_attr.port_num;
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ const u16 path_mtu[] = {-1, 256, 512, 1024, 2048, 4096};
+
+ if (attr->path_mtu < IB_MTU_256 ||
+ attr->path_mtu > IB_MTU_4096 ||
+ iwdev->vsi.mtu <= path_mtu[attr->path_mtu]) {
+ irdma_dev_warn(dev, "Invalid MTU %d\n", attr->path_mtu);
+ return -EINVAL;
+ }
+
+ udp_info->snd_mss = path_mtu[attr->path_mtu];
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ udp_info->psn_nxt = attr->sq_psn;
+ udp_info->lsn = 0xffff;
+ udp_info->psn_una = attr->sq_psn;
+ udp_info->psn_max = attr->sq_psn;
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ udp_info->epsn = attr->rq_psn;
+
+ if (attr_mask & IB_QP_RNR_RETRY)
+ udp_info->rnr_nak_thresh = attr->rnr_retry;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ udp_info->rexmit_thresh = attr->retry_cnt;
+
+ ctx_info->roce_info->pd_id = iwpd->sc_pd.pd_id;
+
+ if (attr_mask & IB_QP_AV) {
+ struct irdma_av *av = &iwqp->roce_ah.av;
+ const struct ib_gid_attr *sgid_attr;
+ u16 vlan_id = IRDMA_NO_VLAN;
+ u32 local_ip[4];
+
+ memset(&iwqp->roce_ah, 0, sizeof(iwqp->roce_ah));
+ if (attr->ah_attr.ah_flags & IB_AH_GRH) {
+ udp_info->ttl = attr->ah_attr.grh.hop_limit;
+ udp_info->flow_label = attr->ah_attr.grh.flow_label;
+ dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri);
+ ctx_info->user_pri = attr->ah_attr.grh.traffic_class;
+ if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri))
+ return -ENOMEM;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ }
+ sgid_attr = attr->ah_attr.grh.sgid_attr;
+ if (sgid_attr->ndev) {
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
+ ether_addr_copy(ctx_info->roce_info->mac_addr,
+ sgid_attr->ndev->dev_addr);
+ }
+ udp_info->vlan_tag = vlan_id;
+ if (udp_info->vlan_tag != IRDMA_NO_VLAN)
+ udp_info->insert_vlan_tag = true;
+ else
+ udp_info->insert_vlan_tag = false;
+ av->attrs = attr->ah_attr;
+ av->attrs.port_num = attr->ah_attr.port_num;
+ rdma_gid2ip(&av->sgid_addr.saddr, &sgid_attr->gid);
+ rdma_gid2ip(&av->dgid_addr.saddr, &attr->ah_attr.grh.dgid);
+ roce_info->local_qp = ibqp->qp_num;
+ if (av->sgid_addr.saddr.sa_family == AF_INET6) {
+ __be32 *daddr =
+ av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
+ __be32 *saddr =
+ av->sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
+
+ irdma_copy_ip_ntohl(&udp_info->dest_ip_addr0, daddr);
+ irdma_copy_ip_ntohl(&udp_info->local_ipaddr0, saddr);
+
+ udp_info->ipv4 = false;
+ irdma_copy_ip_ntohl(local_ip, daddr);
+
+ udp_info->arp_idx = irdma_arp_table(iwdev->rf,
+ &local_ip[0],
+ false, NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr;
+ __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr;
+
+ local_ip[0] = ntohl(daddr);
+
+ udp_info->ipv4 = true;
+ udp_info->dest_ip_addr0 = 0;
+ udp_info->dest_ip_addr1 = 0;
+ udp_info->dest_ip_addr2 = 0;
+ udp_info->dest_ip_addr3 = local_ip[0];
+
+ udp_info->local_ipaddr0 = 0;
+ udp_info->local_ipaddr1 = 0;
+ udp_info->local_ipaddr2 = 0;
+ udp_info->local_ipaddr3 = ntohl(saddr);
+ }
+ udp_info->arp_idx =
+ irdma_add_arp(iwdev->rf, local_ip, udp_info->ipv4,
+ attr->ah_attr.roce.dmac);
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic > dev->hw_attrs.max_hw_ord) {
+ irdma_dev_err(dev,
+ "rd_atomic = %d, above max_hw_ord=%d\n",
+ attr->max_rd_atomic,
+ dev->hw_attrs.max_hw_ord);
+ return -EINVAL;
+ }
+ if (attr->max_rd_atomic)
+ roce_info->ord_size = attr->max_rd_atomic;
+ info.ord_valid = true;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) {
+ irdma_dev_err(dev,
+ "rd_atomic = %d, above max_hw_ird=%d\n",
+ attr->max_rd_atomic,
+ dev->hw_attrs.max_hw_ird);
+ return -EINVAL;
+ }
+ if (attr->max_dest_rd_atomic)
+ roce_info->ird_size = irdma_derive_hw_ird_setting(attr->max_dest_rd_atomic);
+ }
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ roce_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ roce_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ roce_info->rd_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+ roce_info->bind_en = true;
+
+ if (iwqp->user_mode) {
+ roce_info->rd_en = true;
+ roce_info->wr_rdresp_en = true;
+ roce_info->priv_mode_en = false;
+ }
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (attr_mask & IB_QP_STATE) {
+ if (!ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state,
+ iwqp->ibqp.qp_type, attr_mask)) {
+ irdma_dev_warn(dev, "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n",
+ iwqp->ibqp.qp_num, iwqp->ibqp_state, attr->qp_state);
+ return -EINVAL;
+ }
+ if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ info.curr_iwarp_state = iwqp->iwarp_state;
+
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ if (iwqp->iwarp_state > (u32)IRDMA_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
+ info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ break;
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > (u32)IRDMA_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTR;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_RTS:
+ if (iwqp->ibqp_state < IB_QPS_RTR ||
+ iwqp->ibqp_state == IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTS;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->hw_iwarp_state > (u32)IRDMA_QP_STATE_RTS)
+ goto exit;
+
+ if (iwqp->iwarp_state == (u32)IRDMA_QP_STATE_CLOSING ||
+ iwqp->iwarp_state < (u32)IRDMA_QP_STATE_RTS)
+ goto exit;
+
+ if (iwqp->iwarp_state > (u32)IRDMA_QP_STATE_CLOSING) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ issue_modify_qp = 1;
+ break;
+
+ case IB_QPS_SQE:
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->ibqp_state == IB_QPS_SQD)
+ break;
+
+ if (iwqp->iwarp_state == (u32)IRDMA_QP_STATE_ERROR) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ issue_modify_qp = 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+ }
+
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ ret = dev->iw_priv_qp_ops->qp_setctx_roce(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (ret) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "setctx_roce\n");
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_STATE) {
+ if (issue_modify_qp) {
+ if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
+ return -EINVAL;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->iwarp_state == info.curr_iwarp_state) {
+ iwqp->iwarp_state = info.next_iwarp_state;
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (iwqp->ibqp_state > IB_QPS_RTS && !iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_flush_wqes(iwdev->rf, iwqp);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else {
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ }
+
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return ret;
+}
+
+/**
+ * irdma_modify_qp - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int irdma_modify_qp(struct ib_qp *ibqp,
+ struct ib_qp_attr *attr,
+ int attr_mask,
+ struct ib_udata *udata)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_iwarp_offload_info *offload_info;
+ struct irdma_modify_qp_info info = {};
+ u8 issue_modify_qp = 0;
+ u8 dont_wait = 0;
+ u32 err;
+ unsigned long flags;
+
+ ctx_info = &iwqp->ctx_info;
+ offload_info = &iwqp->iwarp_info;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (attr_mask & IB_QP_STATE) {
+ if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.curr_iwarp_state = iwqp->iwarp_state;
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > (u32)IRDMA_QP_STATE_IDLE) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
+ info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ break;
+ case IB_QPS_RTS:
+ if (iwqp->iwarp_state > (u32)IRDMA_QP_STATE_RTS ||
+ !iwqp->cm_id) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ issue_modify_qp = 1;
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ iwqp->hte_added = 1;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTS;
+ info.tcp_ctx_valid = true;
+ info.ord_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->hw_iwarp_state > (u32)IRDMA_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == (u32)IRDMA_QP_STATE_CLOSING ||
+ iwqp->iwarp_state < (u32)IRDMA_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state > (u32)IRDMA_QP_STATE_CLOSING) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_CLOSING;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_SQE:
+ if (iwqp->iwarp_state >= (u32)IRDMA_QP_STATE_TERMINATE) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_TERMINATE;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->iwarp_state == (u32)IRDMA_QP_STATE_ERROR) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->sc_qp.term_flags)
+ irdma_terminate_del_timer(&iwqp->sc_qp);
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ if (iwqp->hw_tcp_state > IRDMA_TCP_STATE_CLOSED &&
+ iwdev->iw_status &&
+ iwqp->hw_tcp_state != IRDMA_TCP_STATE_TIME_WAIT)
+ info.reset_tcp_conn = true;
+ else
+ dont_wait = 1;
+
+ issue_modify_qp = 1;
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ break;
+ default:
+ err = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ ctx_info->iwarp_info_valid = true;
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ offload_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ offload_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ offload_info->rd_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+ offload_info->bind_en = true;
+
+ if (iwqp->user_mode) {
+ offload_info->rd_en = true;
+ offload_info->wr_rdresp_en = true;
+ offload_info->priv_mode_en = false;
+ }
+ }
+
+ if (ctx_info->iwarp_info_valid) {
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ int ret;
+
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ if (ret) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "setting QP context\n");
+ err = -EINVAL;
+ goto exit;
+ }
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (attr_mask & IB_QP_STATE) {
+ if (issue_modify_qp) {
+ if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->iwarp_state == info.curr_iwarp_state) {
+ iwqp->iwarp_state = info.next_iwarp_state;
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+
+ if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
+ if (dont_wait) {
+ if (iwqp->cm_id && iwqp->hw_tcp_state) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_cm_disconn(iwqp);
+ }
+ } else {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->cm_id) {
+ if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+ iwqp->cm_id->add_ref(iwqp->cm_id);
+ irdma_schedule_cm_timer(iwqp->cm_node,
+ (struct irdma_puda_buf *)iwqp,
+ IRDMA_TIMER_TYPE_CLOSE,
+ 1,
+ 0);
+ }
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ }
+
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return err;
+}
+
+/**
+ * cq_free_resources - free up recources for cq
+ * @rf: RDMA PCI function
+ * @iwcq: cq ptr
+ */
+static void cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq)
+{
+ struct irdma_sc_cq *cq = &iwcq->sc_cq;
+
+ if (!iwcq->user_mode)
+ irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem);
+ irdma_free_rsrc(rf, rf->allocated_cqs, cq->cq_uk.cq_id);
+}
+
+/**
+ * irdma_destroy_cq - destroy cq
+ * @ib_cq: cq pointer
+ */
+static int irdma_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct irdma_cq *iwcq;
+ struct irdma_device *iwdev;
+ struct irdma_sc_cq *cq;
+
+ if (!ib_cq) {
+ irdma_pr_err("ib_cq == NULL\n");
+ return 0;
+ }
+
+ iwcq = to_iwcq(ib_cq);
+ iwdev = to_iwdev(ib_cq->device);
+ cq = &iwcq->sc_cq;
+ irdma_cq_wq_destroy(iwdev->rf, cq);
+ cq_free_rsrc(iwdev->rf, iwcq);
+ kfree(iwcq);
+ irdma_rem_devusecount(iwdev);
+
+ return 0;
+}
+
+/**
+ * irdma_create_cq - create cq
+ * @ibdev: device pointer from stack
+ * @attr: attributes for cq
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_cq *irdma_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cq *iwcq;
+ struct irdma_pbl *iwpbl;
+ u32 cq_num = 0;
+ struct irdma_sc_cq *cq;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cq_init_info info = {};
+ enum irdma_status_code status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+ unsigned long flags;
+ int err_code;
+ int entries = attr->cqe;
+
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
+ if (entries > rf->max_cqe)
+ return ERR_PTR(-EINVAL);
+
+ iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
+ if (!iwcq)
+ return ERR_PTR(-ENOMEM);
+
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs,
+ rf->max_cq, &cq_num,
+ &rf->next_cq);
+ if (err_code)
+ goto error;
+
+ cq = &iwcq->sc_cq;
+ cq->back_cq = (void *)iwcq;
+ spin_lock_init(&iwcq->lock);
+ info.dev = dev;
+ ukinfo->cq_size = max(entries, 4);
+ ukinfo->cq_id = cq_num;
+ iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+ if (attr->comp_vector < rf->ceqs_count)
+ info.ceq_id = attr->comp_vector;
+ info.ceq_id_valid = true;
+ info.ceqe_mask = 1;
+ info.type = IRDMA_CQ_TYPE_IWARP;
+ info.vsi = &iwdev->vsi;
+
+ if (context) {
+ struct irdma_ucontext *ucontext;
+ struct irdma_create_cq_req req = {};
+ struct irdma_cq_mr *cqmr;
+
+ iwcq->user_mode = true;
+ ucontext = to_ucontext(context);
+ if (ib_copy_from_udata(&req, udata,
+ sizeof(struct irdma_create_cq_req))) {
+ err_code = -EFAULT;
+ goto cq_free_rsrc;
+ }
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ if (!iwpbl) {
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+
+ iwcq->iwpbl = iwpbl;
+ iwcq->cq_mem_size = 0;
+ cqmr = &iwpbl->cq_mr;
+ info.shadow_area_pa = cqmr->shadow;
+ if (iwpbl->pbl_allocated) {
+ info.virtual_map = true;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+ } else {
+ info.cq_base_pa = cqmr->cq_pbl.addr;
+ }
+ } else {
+ /* Kmode allocations */
+ int rsize;
+ int shadow;
+
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
+ rsize = round_up(rsize, 256);
+ shadow = IRDMA_SHADOW_AREA_SIZE << 3;
+ status = irdma_allocate_dma_mem(dev->hw, &iwcq->kmem,
+ rsize + shadow, 256);
+ if (status) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ ukinfo->cq_base = iwcq->kmem.va;
+ info.cq_base_pa = iwcq->kmem.pa;
+ info.shadow_area_pa = info.cq_base_pa + rsize;
+ ukinfo->shadow_area = iwcq->kmem.va + rsize;
+ }
+
+ if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "init cq fail\n");
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.check_overflow = true;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.cq_create.check_overflow = true;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "CQP-OP Create CQ fail");
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ if (context) {
+ struct irdma_create_cq_resp resp = {};
+
+ resp.cq_id = info.cq_uk_init_info.cq_id;
+ resp.cq_size = info.cq_uk_init_info.cq_size;
+ if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+ irdma_debug(dev, IRDMA_DEBUG_ERR, "copy to user data\n");
+ err_code = -EPROTO;
+ goto cq_destroy;
+ }
+ }
+
+ irdma_add_devusecount(iwdev);
+
+ return (struct ib_cq *)iwcq;
+
+cq_destroy:
+ irdma_cq_wq_destroy(rf, cq);
+cq_free_rsrc:
+ cq_free_rsrc(rf, iwcq);
+error:
+ kfree(iwcq);
+
+ return ERR_PTR(err_code);
+}
+
+/**
+ * irdma_get_user_access - get hw access from IB access
+ * @acc: IB access to return hw access
+ */
+static inline u16 irdma_get_user_access(int acc)
+{
+ u16 access = 0;
+
+ access |= (acc & IB_ACCESS_LOCAL_WRITE) ?
+ IRDMA_ACCESS_FLAGS_LOCALWRITE : 0;
+ access |= (acc & IB_ACCESS_REMOTE_WRITE) ?
+ IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0;
+ access |= (acc & IB_ACCESS_REMOTE_READ) ?
+ IRDMA_ACCESS_FLAGS_REMOTEREAD : 0;
+ access |= (acc & IB_ACCESS_MW_BIND) ?
+ IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+
+ return access;
+}
+
+/**
+ * irdma_free_stag - free stag resource
+ * @iwdev: iwarp device
+ * @stag: stag to free
+ */
+static void irdma_free_stag(struct irdma_device *iwdev, u32 stag)
+{
+ u32 stag_idx;
+
+ stag_idx = (stag & iwdev->rf->mr_stagmask) >> IRDMA_CQPSQ_STAG_IDX_S;
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, stag_idx);
+ irdma_rem_devusecount(iwdev);
+}
+
+/**
+ * irdma_create_stag - create random stag
+ * @iwdev: iwarp device
+ */
+static u32 irdma_create_stag(struct irdma_device *iwdev)
+{
+ u32 stag = 0;
+ u32 stag_index = 0;
+ u32 next_stag_index;
+ u32 driver_key;
+ u32 random;
+ u8 consumer_key;
+ int ret;
+
+ get_random_bytes(&random, sizeof(random));
+ consumer_key = (u8)random;
+
+ driver_key = random & ~iwdev->rf->mr_stagmask;
+ next_stag_index = (random & iwdev->rf->mr_stagmask) >> 8;
+ next_stag_index %= iwdev->rf->max_mr;
+
+ ret = irdma_alloc_rsrc(iwdev->rf,
+ iwdev->rf->allocated_mrs, iwdev->rf->max_mr,
+ &stag_index, &next_stag_index);
+ if (!ret) {
+ stag = stag_index << IRDMA_CQPSQ_STAG_IDX_S;
+ stag |= driver_key;
+ stag += (u32)consumer_key;
+ irdma_add_devusecount(iwdev);
+ }
+ return stag;
+}
+
+/**
+ * irdma_next_pbl_addr - Get next pbl address
+ * @pbl: pointer to a pble
+ * @pinfo: info pointer
+ * @idx: index
+ */
+static inline u64 *irdma_next_pbl_addr(u64 *pbl,
+ struct irdma_pble_info **pinfo,
+ u32 *idx)
+{
+ *idx += 1;
+ if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
+ return ++pbl;
+ *idx = 0;
+ (*pinfo)++;
+
+ return (u64 *)(*pinfo)->addr;
+}
+
+/**
+ * irdma_copy_user_pgaddrs - copy user page address to pble's os locally
+ * @iwmr: iwmr for IB's user page addresses
+ * @pbl: ple pointer to save 1 level or 0 level pble
+ * @level: indicated level 0, 1 or 2
+ */
+static void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr,
+ u64 *pbl,
+ enum irdma_pble_level level)
+{
+ struct ib_umem *region = iwmr->region;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ int chunk_pages, entry, i;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_pble_info *pinfo;
+ struct scatterlist *sg;
+ u64 pg_addr = 0;
+ u32 idx = 0;
+
+ pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf;
+
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ chunk_pages = DIV_ROUND_UP(sg_dma_len(sg), iwmr->page_size);
+ if (iwmr->type == IW_MEMREG_TYPE_QP &&
+ !iwpbl->qp_mr.sq_page)
+ iwpbl->qp_mr.sq_page = sg_page(sg);
+ for (i = 0; i < chunk_pages; i++) {
+ pg_addr = sg_dma_address(sg) + (i * iwmr->page_size);
+ if ((entry + i) == 0)
+ *pbl = pg_addr & iwmr->page_msk;
+ else if (!(pg_addr & ~iwmr->page_msk))
+ *pbl = pg_addr;
+ else
+ continue;
+ pbl = irdma_next_pbl_addr(pbl, &pinfo, &idx);
+ }
+ }
+}
+
+/**
+ * irdma_check_mem_contiguous - check if pbls stored in arr are contiguous
+ * @arr: lvl1 pbl array
+ * @npages: page count
+ * pg_size: page size
+ *
+ */
+static bool irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
+{
+ u32 pg_idx;
+
+ for (pg_idx = 0; pg_idx < npages; pg_idx++) {
+ if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * irdma_check_mr_contiguous - check if MR is physically contiguous
+ * @palloc: pbl allocation struct
+ * pg_size: page size
+ */
+static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
+ u32 pg_size)
+{
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *leaf = lvl2->leaf;
+ u64 *arr = NULL;
+ u64 *start_addr = NULL;
+ int i;
+ bool ret;
+
+ if (palloc->level == PBLE_LEVEL_1) {
+ arr = (u64 *)palloc->level1.addr;
+ ret = irdma_check_mem_contiguous(arr, palloc->total_cnt,
+ pg_size);
+ return ret;
+ }
+
+ start_addr = (u64 *)leaf->addr;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ arr = (u64 *)leaf->addr;
+ if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
+ return false;
+ ret = irdma_check_mem_contiguous(arr, leaf->cnt, pg_size);
+ if (!ret)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * irdma_setup_pbles - copy user pg address to pble's
+ * @rf: RDMA PCI function
+ * @iwmr: mr pointer for this memory registration
+ * @use_pbles: flag if to use pble's
+ */
+static int irdma_setup_pbles(struct irdma_pci_f *rf,
+ struct irdma_mr *iwmr,
+ bool use_pbles)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_pble_info *pinfo;
+ u64 *pbl;
+ enum irdma_status_code status;
+ enum irdma_pble_level level = PBLE_LEVEL_1;
+
+ if (use_pbles) {
+ status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
+ false);
+ if (status)
+ return -ENOMEM;
+
+ iwpbl->pbl_allocated = true;
+ level = palloc->level;
+ pinfo = (level == PBLE_LEVEL_1) ?
+ &palloc->level1 : palloc->level2.leaf;
+ pbl = (u64 *)pinfo->addr;
+ } else {
+ pbl = iwmr->pgaddrmem;
+ }
+
+ irdma_copy_user_pgaddrs(iwmr, pbl, level);
+
+ if (use_pbles)
+ iwmr->pgaddrmem[0] = *pbl;
+
+ return 0;
+}
+
+/**
+ * irdma_handle_q_mem - handle memory for qp and cq
+ * @iwdev: iwarp device
+ * @req: information for q memory management
+ * @iwpbl: pble struct
+ * @use_pbles: flag to use pble
+ */
+static int irdma_handle_q_mem(struct irdma_device *iwdev,
+ struct irdma_mem_reg_req *req,
+ struct irdma_pbl *iwpbl,
+ bool use_pbles)
+{
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_mr *iwmr = iwpbl->iwmr;
+ struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
+ struct irdma_cq_mr *cqmr = &iwpbl->cq_mr;
+ struct irdma_hmc_pble *hmc_p;
+ u64 *arr = iwmr->pgaddrmem;
+ u32 pg_size;
+ int err;
+ int total;
+ bool ret = true;
+
+ total = req->sq_pages + req->rq_pages + req->cq_pages;
+ pg_size = iwmr->page_size;
+ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
+ if (err)
+ return err;
+
+ if (use_pbles && palloc->level != PBLE_LEVEL_1) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ return -ENOMEM;
+ }
+
+ if (use_pbles)
+ arr = (u64 *)palloc->level1.addr;
+
+ if (iwmr->type == IW_MEMREG_TYPE_QP) {
+ hmc_p = &qpmr->sq_pbl;
+ qpmr->shadow = (dma_addr_t)arr[total];
+
+ if (use_pbles) {
+ ret = irdma_check_mem_contiguous(arr,
+ req->sq_pages,
+ pg_size);
+ if (ret)
+ ret = irdma_check_mem_contiguous(&arr[req->sq_pages],
+ req->rq_pages,
+ pg_size);
+ }
+
+ if (!ret) {
+ hmc_p->idx = palloc->level1.idx;
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->idx = palloc->level1.idx + req->sq_pages;
+ } else {
+ hmc_p->addr = arr[0];
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->addr = arr[req->sq_pages];
+ }
+ } else { /* CQ */
+ hmc_p = &cqmr->cq_pbl;
+ cqmr->shadow = (dma_addr_t)arr[total];
+
+ if (use_pbles)
+ ret = irdma_check_mem_contiguous(arr,
+ req->cq_pages,
+ pg_size);
+
+ if (!ret)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ }
+
+ if (use_pbles && ret) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+
+ return err;
+}
+
+/**
+ * irdma_hw_alloc_mw - create the hw memory window
+ * @rf: RDMA PCI function
+ * @iwmr: pointer to memory window info
+ */
+static int irdma_hw_alloc_mw(struct irdma_pci_f *rf,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_mw_alloc_info *info;
+ struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.mw_alloc.info;
+ memset(info, 0, sizeof(*info));
+ if (iwmr->ibmw.type == IB_MW_TYPE_1)
+ info->mw_wide = true;
+
+ info->page_size = PAGE_SIZE;
+ info->mw_stag_index = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->remote_access = true;
+ cqp_info->cqp_cmd = IRDMA_OP_MW_ALLOC;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mw_alloc.dev = &rf->sc_dev;
+ cqp_info->in.u.mw_alloc.scratch = (uintptr_t)cqp_request;
+ if (irdma_handle_cqp_op(rf, cqp_request)) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR, "CQP-OP MR Reg fail");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_alloc_mw
+ * @pd: Protection domain
+ * @type: Window type
+ * @udata: user data pointer
+ */
+static struct ib_mw *irdma_alloc_mw(struct ib_pd *pd,
+ enum ib_mw_type type,
+ struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_mr *iwmr;
+ int err_code;
+ u32 stag;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err_code = -ENOMEM;
+ goto err;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmw.rkey = stag;
+ iwmr->ibmw.pd = pd;
+ iwmr->ibmw.type = type;
+ iwmr->ibmw.device = pd->device;
+ iwmr->type = IW_MEMREG_TYPE_MW;
+
+ err_code = irdma_hw_alloc_mw(iwdev->rf, iwmr);
+ if (err_code)
+ goto err1;
+
+ irdma_add_pdusecount(iwpd);
+
+ return &iwmr->ibmw;
+
+err1:
+ irdma_free_stag(iwdev, stag);
+err:
+ kfree(iwmr);
+
+ return ERR_PTR(err_code);
+}
+
+/**
+ * irdma_dealloc_mw
+ */
+static int irdma_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct ib_pd *ibpd = ibmw->pd;
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_mr *iwmr = to_iwmr((struct ib_mr *)ibmw);
+ struct irdma_device *iwdev = to_iwdev(ibmw->device);
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_dealloc_stag_info *info;
+
+ cqp_request = irdma_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff;
+ info->stag_idx = RS_64_1(ibmw->rkey, IRDMA_CQPSQ_STAG_IDX_S);
+ info->mr = false;
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ if (irdma_handle_cqp_op(iwdev->rf, cqp_request))
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP dealloc MW failed for stag_idx = 0x%x\n",
+ info->stag_idx);
+ irdma_rem_pdusecount(iwpd, iwdev);
+ irdma_free_stag(iwdev, iwmr->stag);
+ kfree(iwmr);
+
+ return 0;
+}
+
+/**
+ * irdma_hw_alloc_stag - cqp command to allocate stag
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ */
+static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_allocate_stag_info *info;
+ struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ enum irdma_status_code status;
+ int err = 0;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.alloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->page_size = PAGE_SIZE;
+ info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->total_len = iwmr->len;
+ info->remote_access = true;
+ cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (status) {
+ err = -ENOMEM;
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP MR Reg fail");
+ }
+
+ return err;
+}
+
+/**
+ * irdma_alloc_mr - register stag for fast memory registration
+ * @pd: ibpd pointer
+ * @mr_type: memory for stag registrion
+ * @max_num_sg: man number of pages
+ */
+static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pble_alloc *palloc;
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ enum irdma_status_code status;
+ u32 stag;
+ int err_code = -ENOMEM;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err_code = -ENOMEM;
+ goto err;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IW_MEMREG_TYPE_MEM;
+ palloc = &iwpbl->pble_alloc;
+ iwmr->page_cnt = max_num_sg;
+ status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
+ true);
+ if (status)
+ goto err1;
+
+ err_code = irdma_hw_alloc_stag(iwdev, iwmr);
+ if (err_code)
+ goto err2;
+
+ iwpbl->pbl_allocated = true;
+ irdma_add_pdusecount(iwpd);
+
+ return &iwmr->ibmr;
+err2:
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+err1:
+ irdma_free_stag(iwdev, stag);
+err:
+ kfree(iwmr);
+
+ return ERR_PTR(err_code);
+}
+
+/**
+ * irdma_set_page - populate pbl list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @addr: page dma address fro pbl list
+ */
+static int irdma_set_page(struct ib_mr *ibmr,
+ u64 addr)
+{
+ struct irdma_mr *iwmr = to_iwmr(ibmr);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ u64 *pbl;
+
+ if (unlikely(iwmr->npages == iwmr->page_cnt))
+ return -ENOMEM;
+
+ pbl = (u64 *)palloc->level1.addr;
+ pbl[iwmr->npages++] = addr;
+
+ return 0;
+}
+
+/**
+ * irdma_map_mr_sg - map of sg list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @sg: scatter gather list for fmr
+ * @sg_nents: number of sg pages
+ */
+static int irdma_map_mr_sg(struct ib_mr *ibmr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct irdma_mr *iwmr = to_iwmr(ibmr);
+
+ iwmr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, irdma_set_page);
+}
+
+/**
+ * irdma_drain_sq - drain the send queue
+ * @ibqp: ib qp pointer
+ */
+static void irdma_drain_sq(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ if (IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ wait_for_completion(&iwqp->sq_drained);
+}
+
+/**
+ * irdma_drain_rq - drain the receive queue
+ * @ibqp: ib qp pointer
+ */
+static void irdma_drain_rq(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ if (IRDMA_RING_MORE_WORK(qp->qp_uk.rq_ring))
+ wait_for_completion(&iwqp->rq_drained);
+}
+
+/**
+ * irdma_hwreg_mr - send cqp command for memory registration
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ * @access: access for MR
+ */
+static int irdma_hwreg_mr(struct irdma_device *iwdev,
+ struct irdma_mr *iwmr,
+ u16 access)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_reg_ns_stag_info *stag_info;
+ struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ enum irdma_status_code status;
+ int err = 0;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
+ memset(stag_info, 0, sizeof(*stag_info));
+ stag_info->va = (void *)(unsigned long)iwpbl->user_base;
+ stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ stag_info->stag_key = (u8)iwmr->stag;
+ stag_info->total_len = iwmr->len;
+ stag_info->access_rights = access;
+ stag_info->pd_id = iwpd->sc_pd.pd_id;
+ stag_info->addr_type = IRDMA_ADDR_TYPE_VA_BASED;
+ stag_info->page_size = iwmr->page_size;
+
+ if (iwpbl->pbl_allocated) {
+ if (palloc->level == PBLE_LEVEL_1) {
+ stag_info->first_pm_pbl_index = palloc->level1.idx;
+ stag_info->chunk_size = 1;
+ } else {
+ stag_info->first_pm_pbl_index = palloc->level2.root.idx;
+ stag_info->chunk_size = 3;
+ }
+ } else {
+ stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
+ }
+
+ cqp_info->cqp_cmd = IRDMA_OP_MR_REG_NON_SHARED;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (status) {
+ err = -ENOMEM;
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP MR Reg fail");
+ }
+
+ return err;
+}
+
+/**
+ * irdma_reg_user_mr - Register a user memory region
+ * @pd: ptr of pd
+ * @start: virtual start address
+ * @length: length of mr
+ * @virt: virtual address
+ * @acc: access of mr
+ * @udata: user data
+ */
+static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd,
+ u64 start,
+ u64 len,
+ u64 virt,
+ int acc,
+ struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_ucontext *ucontext;
+ struct irdma_pble_alloc *palloc;
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ struct ib_umem *region;
+ struct irdma_mem_reg_req req;
+ u64 pbl_depth = 0;
+ u32 stag = 0;
+ u16 access;
+ u64 region_len;
+ bool use_pbles = false;
+ unsigned long flags;
+ int err = -ENOSYS;
+ int ret, pg_shift;
+
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
+ if (len > IRDMA_MAX_MR_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ region = ib_umem_get(udata, start, len, acc, 0);
+ if (IS_ERR(region))
+ return (struct ib_mr *)region;
+
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ ib_umem_release(region);
+ return ERR_PTR(-EFAULT);
+ }
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr) {
+ ib_umem_release(region);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->region = region;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ ucontext = to_ucontext(pd->uobject->context);
+ iwmr->page_size = PAGE_SIZE;
+ iwmr->page_msk = PAGE_MASK;
+
+ region_len = region->length + (start & (iwmr->page_size - 1));
+ pg_shift = ffs(iwmr->page_size) - 1;
+ pbl_depth = region_len >> pg_shift;
+ pbl_depth += (region_len & (iwmr->page_size - 1)) ? 1 : 0;
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ palloc = &iwpbl->pble_alloc;
+ iwmr->type = req.reg_type;
+ iwmr->page_cnt = (u32)pbl_depth;
+
+ switch (req.reg_type) {
+ case IW_MEMREG_TYPE_QP:
+ use_pbles = ((req.sq_pages + req.rq_pages) > 2);
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ if (err)
+ goto error;
+
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ break;
+ case IW_MEMREG_TYPE_CQ:
+ use_pbles = (req.cq_pages > 1);
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+ if (err)
+ goto error;
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ break;
+ case IW_MEMREG_TYPE_MEM:
+ use_pbles = (iwmr->page_cnt != 1);
+ access = IRDMA_ACCESS_FLAGS_LOCALREAD;
+
+ err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
+ if (err)
+ goto error;
+
+ if (use_pbles) {
+ ret = irdma_check_mr_contiguous(palloc,
+ iwmr->page_size);
+ if (ret) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+ }
+
+ access |= irdma_get_user_access(acc);
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ err = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (err) {
+ irdma_free_stag(iwdev, stag);
+ goto error;
+ }
+
+ break;
+ default:
+ goto error;
+ }
+
+ iwmr->type = req.reg_type;
+ if (req.reg_type == IW_MEMREG_TYPE_MEM)
+ irdma_add_pdusecount(iwpd);
+
+ return &iwmr->ibmr;
+
+error:
+ if (palloc->level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ ib_umem_release(region);
+ kfree(iwmr);
+
+ return ERR_PTR(err);
+}
+
+/**
+ * irdma_reg_phys_mr - register kernel physical memory
+ * @pd: ibpd pointer
+ * @addr: physical address of memory to register
+ * @size: size of memory to register
+ * @acc: Access rights
+ * @iova_start: start of virtual address for physical buffers
+ */
+struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd,
+ u64 addr,
+ u64 size,
+ int acc,
+ u64 *iova_start)
+{
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ enum irdma_status_code status;
+ u32 stag;
+ u16 access = IRDMA_ACCESS_FLAGS_LOCALREAD;
+ int ret;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IW_MEMREG_TYPE_MEM;
+ iwpbl->user_base = *iova_start;
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ access |= irdma_get_user_access(acc);
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->page_cnt = 1;
+ iwmr->pgaddrmem[0] = addr;
+ iwmr->len = size;
+ status = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (status) {
+ irdma_free_stag(iwdev, stag);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ irdma_add_pdusecount(iwpd);
+ return &iwmr->ibmr;
+
+ err:
+ kfree(iwmr);
+
+ return ERR_PTR(ret);
+}
+
+/**
+ * irdma_get_dma_mr - register physical mem
+ * @pd: ptr of pd
+ * @acc: access for memory
+ */
+static struct ib_mr *irdma_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ u64 kva = 0;
+
+ return irdma_reg_phys_mr(pd, 0, 0, acc, &kva);
+}
+
+/**
+ * irdma_del_mem_list - Deleting pbl list entries for CQ/QP
+ * @iwmr: iwmr for IB's user page addresses
+ * @ucontext: ptr to user context
+ */
+static void irdma_del_memlist(struct irdma_mr *iwmr,
+ struct irdma_ucontext *ucontext)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ unsigned long flags;
+
+ switch (iwmr->type) {
+ case IW_MEMREG_TYPE_CQ:
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ break;
+ case IW_MEMREG_TYPE_QP:
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_dereg_mr - deregister mr
+ * @ib_mr: mr ptr for dereg
+ */
+static int irdma_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct ib_pd *ibpd = ib_mr->pd;
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ enum irdma_status_code status;
+ struct irdma_dealloc_stag_info *info;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ u32 stag_idx;
+
+ if (iwmr->type != IW_MEMREG_TYPE_MEM) {
+ if (iwmr->region) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = to_ucontext(ibpd->uobject->context);
+ irdma_del_memlist(iwmr, ucontext);
+ }
+ goto done;
+ }
+
+ cqp_request = irdma_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff;
+ info->stag_idx = RS_64_1(ib_mr->rkey, IRDMA_CQPSQ_STAG_IDX_S);
+ stag_idx = info->stag_idx;
+ info->mr = true;
+ if (iwpbl->pbl_allocated)
+ info->dealloc_pbl = true;
+
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (status)
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP dealloc failed for stag_idx = 0x%x\n",
+ stag_idx);
+ irdma_rem_pdusecount(iwpd, iwdev);
+ irdma_free_stag(iwdev, iwmr->stag);
+done:
+ if (iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ if (iwmr->region)
+ ib_umem_release(iwmr->region);
+ kfree(iwmr);
+
+ return 0;
+}
+
+/**
+ * hw_rev_show
+ */
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct irdma_ib_device *iwibdev =
+ rdma_device_to_drv_device(dev, struct irdma_ib_device, ibdev);
+
+ u32 hw_rev = iwibdev->iwdev->rf->sc_dev.pci_rev;
+
+ return sprintf(buf, "%x\n", hw_rev);
+}
+
+/**
+ * hca_type_show
+ */
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "IRDMA\n");
+}
+
+/**
+ * board_id_show
+ */
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%.*s\n", 32, "IRDMA Board ID");
+}
+
+static DEVICE_ATTR_RO(hw_rev);
+static DEVICE_ATTR_RO(hca_type);
+static DEVICE_ATTR_RO(board_id);
+
+static struct attribute *irdma_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
+
+static const struct attribute_group irdma_attr_group = {
+ .attrs = irdma_dev_attributes,
+};
+
+/**
+ * irdma_copy_sg_list - copy sg list for qp
+ * @sg_list: copied into sg_list
+ * @sgl: copy from sgl
+ * @num_sges: count of sg entries
+ */
+static void irdma_copy_sg_list(struct irdma_sge *sg_list,
+ struct ib_sge *sgl,
+ int num_sges)
+{
+ unsigned int i;
+
+ for (i = 0; (i < num_sges) && (i < IRDMA_MAX_WQ_FRAGMENT_COUNT); i++) {
+ sg_list[i].tag_off = sgl[i].addr;
+ sg_list[i].len = sgl[i].length;
+ sg_list[i].stag = sgl[i].lkey;
+ }
+}
+
+/**
+ * irdma_post_send - kernel application wr
+ * @ibqp: qp ptr for wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+static int irdma_post_send(struct ib_qp *ibqp,
+ const struct ib_send_wr *ib_wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_qp_uk *ukqp;
+ struct irdma_sc_dev *dev;
+ struct irdma_post_sq_info info;
+ enum irdma_status_code ret;
+ int err = 0;
+ unsigned long flags;
+ bool inv_stag;
+ struct irdma_ah *ah;
+ bool reflush = false;
+
+ iwqp = (struct irdma_qp *)ibqp;
+ ukqp = &iwqp->sc_qp.qp_uk;
+ dev = &iwqp->iwdev->rf->sc_dev;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->flush_issued && ukqp->sq_flush_complete)
+ reflush = true;
+
+ while (ib_wr) {
+ memset(&info, 0, sizeof(info));
+ inv_stag = false;
+ info.wr_id = (u64)(ib_wr->wr_id);
+ if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
+ info.signaled = true;
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ info.read_fence = true;
+ switch (ib_wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->ex.imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ /* fall-through */
+ case IB_WR_SEND:
+ /* fall-through */
+ case IB_WR_SEND_WITH_INV:
+ if (ib_wr->opcode == IB_WR_SEND ||
+ ib_wr->opcode == IB_WR_SEND_WITH_IMM) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_SEND_SOL;
+ else
+ info.op_type = IRDMA_OP_TYPE_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV;
+ else
+ info.op_type = IRDMA_OP_TYPE_SEND_INV;
+ info.stag_to_inv = ib_wr->ex.invalidate_rkey;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_INLINE) {
+ info.op.inline_send.data = (void *)(unsigned long)
+ ib_wr->sg_list[0].addr;
+ info.op.inline_send.len = ib_wr->sg_list[0].length;
+ if (iwqp->ibqp.qp_type == IB_QPT_UD ||
+ iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ ah = to_iwah(ud_wr(ib_wr)->ah);
+ info.op.inline_send.ah_id = ah->sc_ah.ah_info.ah_idx;
+ info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey;
+ info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn;
+ }
+ ret = ukqp->qp_ops.iw_inline_send(ukqp, &info, false);
+ } else {
+ info.op.send.num_sges = ib_wr->num_sge;
+ info.op.send.sg_list = (struct irdma_sge *)
+ ib_wr->sg_list;
+ if (iwqp->ibqp.qp_type == IB_QPT_UD ||
+ iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ ah = to_iwah(ud_wr(ib_wr)->ah);
+ info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
+ info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
+ info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
+ }
+ ret = ukqp->qp_ops.iw_send(ukqp, &info, false);
+ }
+
+ if (ret) {
+ if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->ex.imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ /* fall-through */
+ case IB_WR_RDMA_WRITE:
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL;
+ else
+ info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
+
+ if (ib_wr->send_flags & IB_SEND_INLINE) {
+ info.op.inline_rdma_write.data = (void *)ib_wr->sg_list[0].addr;
+ info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
+ info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+ info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+ ret = ukqp->qp_ops.iw_inline_rdma_write(ukqp, &info, false);
+ } else {
+ info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+ info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+ ret = ukqp->qp_ops.iw_rdma_write(ukqp, &info, false);
+ }
+
+ if (ret) {
+ if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ inv_stag = true;
+ /* fall-through*/
+ case IB_WR_RDMA_READ:
+ if (ib_wr->num_sge > dev->hw_attrs.max_hw_read_sges) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_RDMA_READ;
+ info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
+
+ ret = ukqp->qp_ops.iw_rdma_read(ukqp, &info, inv_stag,
+ false);
+ if (ret) {
+ if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ }
+ break;
+ case IB_WR_LOCAL_INV:
+ info.op_type = IRDMA_OP_TYPE_INV_STAG;
+ info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
+ ret = ukqp->qp_ops.iw_stag_local_invalidate(ukqp, &info, true);
+ if (ret)
+ err = -ENOMEM;
+ break;
+ case IB_WR_REG_MR:
+ {
+ struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
+ int flags = reg_wr(ib_wr)->access;
+ struct irdma_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
+ struct irdma_fast_reg_stag_info info = {};
+
+ info.access_rights = IRDMA_ACCESS_FLAGS_LOCALREAD;
+ info.access_rights |= irdma_get_user_access(flags);
+ info.stag_key = reg_wr(ib_wr)->key & 0xff;
+ info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ info.page_size = reg_wr(ib_wr)->mr->page_size;
+ info.wr_id = ib_wr->wr_id;
+ info.addr_type = IRDMA_ADDR_TYPE_VA_BASED;
+ info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
+ info.total_len = iwmr->ibmr.length;
+ info.reg_addr_pa = *(u64 *)palloc->level1.addr;
+ info.first_pm_pbl_index = palloc->level1.idx;
+ info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
+ info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
+ if (iwmr->npages > IRDMA_MIN_PAGES_PER_FMR)
+ info.chunk_size = 1;
+ ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp,
+ &info,
+ true);
+ if (ret)
+ err = -ENOMEM;
+ break;
+ }
+ default:
+ err = -EINVAL;
+ irdma_debug(dev, IRDMA_DEBUG_ERR,
+ " upost_send bad opcode = 0x%x\n",
+ ib_wr->opcode);
+ break;
+ }
+
+ if (err)
+ break;
+ ib_wr = ib_wr->next;
+ }
+
+ if (!iwqp->flush_issued &&
+ iwqp->hw_iwarp_state <= (u32)IRDMA_QP_STATE_RTS) {
+ ukqp->qp_ops.iw_qp_post_wr(ukqp);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ } else if (reflush) {
+ struct irdma_qp_flush_info flush_info = {};
+ struct irdma_pci_f *rf = iwqp->iwdev->rf;
+
+ iwqp->sc_qp.flush_sq = false;
+ iwqp->sc_qp.term_flags = 0;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ ukqp->sq_flush_complete = false;
+ flush_info.sq = true;
+ flush_info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ flush_info.sq_minor_code = IRDMA_FLUSH_MAJOR_ERR;
+ irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &flush_info, false);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_post_recv - post receive wr for kernel application
+ * @ibqp: ib qp pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int irdma_post_recv(struct ib_qp *ibqp,
+ const struct ib_recv_wr *ib_wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_qp_uk *ukqp;
+ struct irdma_post_rq_info post_recv = {};
+ struct irdma_sge sg_list[IRDMA_MAX_WQ_FRAGMENT_COUNT];
+ enum irdma_status_code ret = 0;
+ unsigned long flags;
+ int err = 0;
+ bool reflush = false;
+
+ iwqp = (struct irdma_qp *)ibqp;
+ ukqp = &iwqp->sc_qp.qp_uk;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->flush_issued && ukqp->rq_flush_complete)
+ reflush = true;
+
+ while (ib_wr) {
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
+ post_recv.sg_list = sg_list;
+ ret = ukqp->qp_ops.iw_post_receive(ukqp, &post_recv);
+ if (ret) {
+ irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "post_recv err %d\n", ret);
+ if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED)
+ err = -ENOMEM;
+ else
+ err = -EINVAL;
+ goto out;
+ }
+
+ ib_wr = ib_wr->next;
+ }
+
+out:
+ if (reflush) {
+ struct irdma_qp_flush_info flush_info = { };
+ struct irdma_pci_f *rf = iwqp->iwdev->rf;
+
+ iwqp->sc_qp.flush_rq = false;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ ukqp->rq_flush_complete = false;
+ flush_info.rq = true;
+ irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &flush_info, false);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_poll_cq - poll cq for completion (kernel apps)
+ * @ibcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of entry completed
+ */
+static int irdma_poll_cq(struct ib_cq *ibcq,
+ int num_entries,
+ struct ib_wc *entry)
+{
+ struct irdma_cq *iwcq;
+ int cqe_count = 0;
+ struct irdma_cq_poll_info cq_poll_info;
+ enum irdma_status_code ret;
+ struct irdma_cq_uk *ukcq;
+ struct irdma_sc_qp *qp;
+ struct irdma_qp *iwqp;
+ unsigned long flags;
+
+ iwcq = (struct irdma_cq *)ibcq;
+ ukcq = &iwcq->sc_cq.cq_uk;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ while (cqe_count < num_entries) {
+ ret = ukcq->ops.iw_cq_poll_cmpl(ukcq, &cq_poll_info);
+ if (ret == IRDMA_ERR_Q_EMPTY) {
+ break;
+ } else if (ret == IRDMA_ERR_Q_DESTROYED) {
+ continue;
+ } else if (ret) {
+ if (!cqe_count)
+ cqe_count = -1;
+ break;
+ }
+
+ entry->wc_flags = 0;
+ entry->wr_id = cq_poll_info.wr_id;
+ if (cq_poll_info.error) {
+ if (cq_poll_info.comp_status == IRDMA_COMPL_STATUS_FLUSHED)
+ entry->status = IB_WC_WR_FLUSH_ERR;
+ else if (cq_poll_info.comp_status == IRDMA_COMPL_STATUS_INVALID_LEN)
+ entry->status = IB_WC_LOC_LEN_ERR;
+ else
+ entry->status = IB_WC_GENERAL_ERR;
+ entry->vendor_err = cq_poll_info.major_err << 16 |
+ cq_poll_info.minor_err;
+ } else {
+ entry->status = IB_WC_SUCCESS;
+ if (cq_poll_info.imm_valid) {
+ entry->ex.imm_data =
+ htonl(cq_poll_info.imm_data);
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ }
+ if (cq_poll_info.ud_smac_valid) {
+ ether_addr_copy(entry->smac, cq_poll_info.ud_smac);
+ entry->wc_flags |= IB_WC_WITH_SMAC;
+ }
+
+ if (cq_poll_info.ud_vlan_valid) {
+ entry->vlan_id = cq_poll_info.ud_vlan;
+ entry->wc_flags |= IB_WC_WITH_VLAN;
+ entry->sl = (cq_poll_info.ud_vlan >> 13) & 0x07;
+ }
+ }
+
+ switch (cq_poll_info.op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
+ case IRDMA_OP_TYPE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ break;
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND_SOL:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ case IRDMA_OP_TYPE_SEND:
+ entry->opcode = IB_WC_SEND;
+ if (cq_poll_info.stag_invalid_set)
+ entry->ex.invalidate_rkey = cq_poll_info.inv_stag;
+ break;
+ case IRDMA_OP_TYPE_REC:
+ entry->opcode = IB_WC_RECV;
+ break;
+ case IRDMA_OP_TYPE_REC_IMM:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ break;
+ }
+
+ qp = (struct irdma_sc_qp *)cq_poll_info.qp_handle;
+ entry->qp = (struct ib_qp *)qp->back_qp;
+ if (qp->qp_type == IRDMA_QP_TYPE_ROCE_UD) {
+ entry->src_qp = cq_poll_info.ud_src_qpn;
+ entry->wc_flags |=
+ (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ entry->network_hdr_type =
+ cq_poll_info.ipv4 ?
+ RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6;
+ } else {
+ entry->src_qp = cq_poll_info.qp_id;
+ }
+ iwqp = (struct irdma_qp *)qp->back_qp;
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_RTS) {
+ if (!IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ complete(&iwqp->sq_drained);
+ if (!IRDMA_RING_MORE_WORK(qp->qp_uk.rq_ring))
+ complete(&iwqp->rq_drained);
+ }
+ entry->byte_len = cq_poll_info.bytes_xfered;
+ entry++;
+ cqe_count++;
+ }
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return cqe_count;
+}
+
+/**
+ * irdma_req_notify_cq - arm cq kernel application
+ * @ibcq: cq to arm
+ * @notify_flags: notofication flags
+ */
+static int irdma_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags notify_flags)
+{
+ struct irdma_cq *iwcq;
+ struct irdma_cq_uk *ukcq;
+ unsigned long flags;
+ enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT;
+
+ iwcq = (struct irdma_cq *)ibcq;
+ ukcq = &iwcq->sc_cq.cq_uk;
+ if (notify_flags == IB_CQ_SOLICITED)
+ cq_notify = IRDMA_CQ_COMPL_SOLICITED;
+ spin_lock_irqsave(&iwcq->lock, flags);
+ ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_port_immutable - return port's immutable data
+ * @ibdev: ib dev struct
+ * @port_num: port number
+ * @immutable: immutable data for the port return
+ */
+static int irdma_port_immutable(struct ib_device *ibdev,
+ u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ if (iwdev->roce_mode) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ }
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static const char * const irdma_hw_stat_names[] = {
+ /* 32bit names */
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors",
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled",
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored",
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent",
+
+ /* 64bit names */
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4InOctets",
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4InPkts",
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4InReasmRqd",
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4InMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4InMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4OutOctets",
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4OutPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4OutSegRqd",
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4OutMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip4OutMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6InOctets",
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6InPkts",
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6InReasmRqd",
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6InMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6InMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6OutOctets",
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6OutPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6OutSegRqd",
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6OutMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "ip6OutMcastPkts",
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "tcpInSegs",
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "tcpOutSegs",
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwInRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwInRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwInRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwOutRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwOutRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwOutRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwRdmaBnd",
+ [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "iwRdmaInv",
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "RxUDP",
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "TxUDP",
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] =
+ "RxECNMrkd",
+};
+
+static void irdma_get_dev_fw_str(struct ib_device *dev,
+ char *str)
+{
+ struct irdma_device *iwdev = to_iwdev(dev);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u",
+ FW_MAJOR_VER(&iwdev->rf->sc_dev),
+ FW_MINOR_VER(&iwdev->rf->sc_dev));
+}
+
+/**
+ * irdma_alloc_hw_stats - Allocate a hw stats structure
+ * @ibdev: device pointer from stack
+ * @port_num: port number
+ */
+static struct rdma_hw_stats *irdma_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ int num_counters = IRDMA_HW_STAT_INDEX_MAX_32 +
+ IRDMA_HW_STAT_INDEX_MAX_64;
+ unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
+
+ BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_names) != (IRDMA_HW_STAT_INDEX_MAX_32 +
+ IRDMA_HW_STAT_INDEX_MAX_64));
+
+ /*
+ * PFs get the default update lifespan, but VFs only update once
+ * per second
+ */
+ if (!dev->is_pf)
+ lifespan = 1000;
+
+ return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters,
+ lifespan);
+}
+
+/**
+ * irdma_get_hw_stats - Populates the rdma_hw_stats structure
+ * @ibdev: device pointer from stack
+ * @stats: stats pointer from stack
+ * @port_num: port number
+ * @index: which hw counter the stack is requesting we update
+ */
+static int irdma_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num,
+ int index)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_dev_hw_stats *hw_stats = &iwdev->vsi.pestat->hw_stats;
+
+ irdma_cqp_gather_stats_cmd(&iwdev->rf->sc_dev,
+ iwdev->vsi.pestat, true);
+ memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
+
+ return stats->num_counters;
+}
+
+/**
+ * irdma_query_gid - Query port GID
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int irdma_query_gid(struct ib_device *ibdev,
+ u8 port,
+ int index,
+ union ib_gid *gid)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ memset(gid->raw, 0, sizeof(gid->raw));
+ ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
+
+ return 0;
+}
+
+/**
+ * irdma_modify_port Modify port properties
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @port_modify_mask: mask for port modifications
+ * @props: port properties
+ */
+static int irdma_modify_port(struct ib_device *ibdev,
+ u8 port,
+ int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ return 0;
+}
+
+/**
+ * irdma_query_gid_roce - Query port GID for Roce
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int irdma_query_gid_roce(struct ib_device *ibdev,
+ u8 port,
+ int index,
+ union ib_gid *gid)
+{
+ int ret;
+
+ ret = rdma_query_gid(ibdev, port, index, gid);
+ if (ret == -EAGAIN) {
+ memcpy(gid, &zgid, sizeof(*gid));
+ return 0;
+ }
+
+ return ret;
+}
+
+/**
+ * mcast_list_add - Add a new mcast item to list
+ * @rf: RDMA PCI function
+ * @new_elem: pointer to element to add
+ */
+static void mcast_list_add(struct irdma_pci_f *rf,
+ struct mc_table_list *new_elem)
+{
+ list_add(&new_elem->list, &rf->mc_qht_list.list);
+}
+
+/**
+ * mcast_list_del - Remove an mcast item from list
+ * @ip_mcast: pointer to mcast IP address
+ */
+static void mcast_list_del(struct mc_table_list *mc_qht_elem)
+{
+ if (mc_qht_elem)
+ list_del(&mc_qht_elem->list);
+}
+
+/**
+ * irdma_mcast_list_lookup_ip - Search mcast list for address
+ * @rf: RDMA PCI function
+ * @ip_mcast: pointer to mcast IP address
+ */
+static struct mc_table_list *mcast_list_lookup_ip(struct irdma_pci_f *rf,
+ u32 *ip_mcast)
+{
+ struct mc_table_list *mc_qht_el;
+ struct list_head *pos, *q;
+
+ list_for_each_safe(pos, q, &rf->mc_qht_list.list) {
+ mc_qht_el = list_entry(pos, struct mc_table_list, list);
+ if (!memcmp(mc_qht_el->mc_info.dest_ip,
+ ip_mcast,
+ sizeof(mc_qht_el->mc_info.dest_ip))) {
+ return mc_qht_el;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_mcast_cqp_op - perform a mcast cqp operation
+ * @iwdev: device
+ * @mc_grp_ctx: mcast group info
+ * @op: operation
+ *
+ * returns error status
+ */
+static int irdma_mcast_cqp_op(struct irdma_device *iwdev,
+ struct irdma_mcast_grp_info *mc_grp_ctx,
+ u8 op)
+{
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cqp_request *cqp_request;
+ enum irdma_status_code status;
+
+ cqp_request = irdma_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_request->info.in.u.mc_create.info = *mc_grp_ctx;
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = op;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (status) {
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP_%s failed\n",
+ (op == IRDMA_OP_MC_MODIFY) ? "MODIFY" : "CREATE");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_mcast_mac - Get the multicast MAC for an IP address
+ * @ipaddress: IPv4 or IPv6 address
+ * @mac: pointer to result MAC address
+ * @ipv4: flag indicating IPv4 or IPv6
+ *
+ */
+void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4)
+{
+ u8 *ip = (u8 *)ip_addr;
+
+ if (ipv4) {
+ unsigned char mac4[ETH_ALEN] = {0x01, 0x00, 0x5E, 0x00,
+ 0x00, 0x00};
+
+ mac4[3] = ip[2] & 0x7F;
+ mac4[4] = ip[1];
+ mac4[5] = ip[0];
+ ether_addr_copy(mac, mac4);
+ } else {
+ unsigned char mac6[ETH_ALEN] = {0x33, 0x33, 0x00, 0x00,
+ 0x00, 0x00};
+
+ mac6[2] = ip[3];
+ mac6[3] = ip[2];
+ mac6[4] = ip[1];
+ mac6[5] = ip[0];
+ ether_addr_copy(mac, mac6);
+ }
+}
+
+/**
+ * irdma_attach_mcast - attach a qp to a multicast group
+ * @ibqp: ptr to qp
+ * @ibgid: pointer to global ID
+ * @lid: local ID
+ *
+ * returns error status
+ */
+static int irdma_attach_mcast(struct ib_qp *ibqp,
+ union ib_gid *ibgid,
+ u16 lid)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct mc_table_list *mc_qht_elem;
+ struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
+ enum irdma_status_code status;
+ bool allocated_mgn = false;
+ unsigned long flags;
+ u32 ip_addr[4] = {};
+ u32 mgn;
+ u32 no_mgs;
+ int ret = 0;
+ bool ipv4;
+ u16 vlan_id;
+ union {
+ struct sockaddr saddr;
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+ } sgid_addr;
+ unsigned char dmac[ETH_ALEN];
+
+ rdma_gid2ip(&sgid_addr.saddr, ibgid);
+ if (rdma_gid_attr_network_type(ibqp->av_sgid_attr) == RDMA_NETWORK_IPV6) {
+ irdma_copy_ip_ntohl(ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_netdev_vlan_ipv6(ip_addr, &vlan_id, NULL);
+ ipv4 = false;
+ irdma_dev_info(&rf->sc_dev,
+ "qp_id=%d, IP6address=%pI6\n",
+ ibqp->qp_num, ip_addr);
+ irdma_mcast_mac(ip_addr, dmac, false);
+ } else {
+ ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ ipv4 = true;
+ vlan_id = irdma_get_vlan_ipv4(ip_addr);
+ irdma_mcast_mac(ip_addr, dmac, true);
+ irdma_dev_info(&rf->sc_dev,
+ "qp_id=%d, IP4address=%pI4, MAC=%pM\n",
+ ibqp->qp_num, ip_addr, dmac);
+ }
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
+ if (!mc_qht_elem) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ mc_qht_elem = kzalloc(sizeof(*mc_qht_elem), GFP_KERNEL);
+ if (!mc_qht_elem)
+ return -ENOMEM;
+
+ mc_qht_elem->mc_info.ipv4_valid = ipv4;
+ memcpy(mc_qht_elem->mc_info.dest_ip, ip_addr,
+ sizeof(mc_qht_elem->mc_info.dest_ip));
+ ret = irdma_alloc_rsrc(rf, rf->allocated_mcgs,
+ rf->max_mcg,
+ &mgn, &rf->next_mcg);
+ if (ret) {
+ kfree(mc_qht_elem);
+ return -ENOMEM;
+ }
+
+ allocated_mgn = true;
+ mc_qht_elem->mc_info.mgn = mgn;
+ status = irdma_allocate_dma_mem(&rf->hw,
+ &mc_qht_elem->mc_grp_ctx.dma_mem_mc,
+ sizeof(u64) * IRDMA_MAX_MGS_PER_CTX,
+ 4096);
+ if (status) {
+ irdma_free_rsrc(rf, rf->allocated_mcgs, mgn);
+ kfree(mc_qht_elem);
+ return -ENOMEM;
+ }
+
+ mc_qht_elem->mc_grp_ctx.mg_id = (u16)mgn;
+ memcpy(mc_qht_elem->mc_grp_ctx.dest_ip_addr, ip_addr,
+ sizeof(mc_qht_elem->mc_grp_ctx.dest_ip_addr));
+ mc_qht_elem->mc_grp_ctx.ipv4_valid = ipv4;
+ mc_qht_elem->mc_grp_ctx.vlan_id = vlan_id;
+ if (vlan_id != IRDMA_NO_VLAN)
+ mc_qht_elem->mc_grp_ctx.vlan_valid = true;
+ mc_qht_elem->mc_grp_ctx.hmc_fcn_id = iwdev->vsi.fcn_id;
+ ether_addr_copy(mc_qht_elem->mc_grp_ctx.dest_mac_addr, dmac);
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mcast_list_add(rf, mc_qht_elem);
+ } else {
+ if (mc_qht_elem->mc_grp_ctx.no_of_mgs == IRDMA_MAX_MGS_PER_CTX) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ return -ENOMEM;
+ }
+ }
+
+ mcg_info.qp_id = iwqp->ibqp.qp_num;
+ no_mgs = mc_qht_elem->mc_grp_ctx.no_of_mgs;
+ rf->sc_dev.iw_uda_ops->mcast_grp_add(&mc_qht_elem->mc_grp_ctx,
+ &mcg_info);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+
+ /* Only if there is a change do we need to modify or create */
+ if (!no_mgs) {
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_CREATE);
+ } else if (no_mgs != mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_MODIFY);
+ } else {
+ return 0;
+ }
+
+ if (ret)
+ goto error;
+
+ atomic_inc(&iwqp->mcast_ref_cnt);
+ return 0;
+
+error:
+ rf->sc_dev.iw_uda_ops->mcast_grp_del(&mc_qht_elem->mc_grp_ctx,
+ &mcg_info);
+ if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ mcast_list_del(mc_qht_elem);
+ irdma_free_dma_mem(&rf->hw,
+ &mc_qht_elem->mc_grp_ctx.dma_mem_mc);
+ irdma_free_rsrc(rf, rf->allocated_mcgs,
+ mc_qht_elem->mc_grp_ctx.mg_id);
+ kfree(mc_qht_elem);
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_detach_mcast - detach a qp from a multicast group
+ * @ibqp: ptr to qp
+ * @ibgid: pointer to global ID
+ * @lid: local ID
+ *
+ * returns error status
+ */
+static int irdma_detach_mcast(struct ib_qp *ibqp,
+ union ib_gid *ibgid,
+ u16 lid)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ u32 ip_addr[4] = {};
+ struct mc_table_list *mc_qht_elem;
+ struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
+ int ret;
+ unsigned long flags;
+ union {
+ struct sockaddr saddr;
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+ } sgid_addr;
+
+ rdma_gid2ip(&sgid_addr.saddr, ibgid);
+ if (rdma_gid_attr_network_type(ibqp->av_sgid_attr) == RDMA_NETWORK_IPV6) {
+ irdma_copy_ip_ntohl(ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_dev_info(&rf->sc_dev,
+ "qp_id=%d, IP6address=%pI6\n",
+ ibqp->qp_num, ip_addr);
+ } else {
+ ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ irdma_dev_info(&rf->sc_dev,
+ "qp_id=%d, IP4address=%pI4\n",
+ ibqp->qp_num, ip_addr);
+ }
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
+ if (!mc_qht_elem) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ irdma_pr_info("address not found MCG\n");
+ return 0; /* OK to remove group already removed */
+ }
+
+ mcg_info.qp_id = iwqp->ibqp.qp_num;
+ rf->sc_dev.iw_uda_ops->mcast_grp_del(&mc_qht_elem->mc_grp_ctx,
+ &mcg_info);
+ if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ mcast_list_del(mc_qht_elem);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_DESTROY);
+ if (ret) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "failed MC_DESTROY MCG\n");
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mcast_list_add(rf, mc_qht_elem);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ return -EAGAIN;
+ }
+
+ irdma_free_dma_mem(&rf->hw,
+ &mc_qht_elem->mc_grp_ctx.dma_mem_mc);
+ irdma_free_rsrc(rf, rf->allocated_mcgs,
+ mc_qht_elem->mc_grp_ctx.mg_id);
+ kfree(mc_qht_elem);
+ } else {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_MODIFY);
+ if (ret) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "failed Modify MCG\n");
+ return ret;
+ }
+ }
+ atomic_dec(&iwqp->mcast_ref_cnt);
+
+ return 0;
+}
+
+/**
+ * irdma_create_ah - create address handle
+ * @ibpd: ptr to protection domain
+ * @ah_attr: address handle attributes
+ *
+ * returns a pointer to an address handle
+ */
+static struct ib_ah *irdma_create_ah(struct ib_pd *ibpd,
+ struct rdma_ah_attr *attr,
+ u32 flags,
+ struct ib_udata *udata)
+{
+ struct irdma_pd *pd = to_iwpd(ibpd);
+ struct irdma_ah *ah;
+ const struct ib_gid_attr *sgid_attr;
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_sc_ah *sc_ah;
+ u32 ah_id = 0;
+ struct irdma_ah_info *ah_info;
+ struct irdma_create_ah_resp uresp;
+ union {
+ struct sockaddr saddr;
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+ } sgid_addr, dgid_addr;
+ int err;
+ u8 dmac[ETH_ALEN];
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs,
+ rf->max_ah, &ah_id, &rf->next_ah);
+ if (err)
+ return ERR_PTR(err);
+
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ if (!ah) {
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah_id);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ah->pd = pd;
+ sc_ah = &ah->sc_ah;
+ sc_ah->ah_info.ah_idx = ah_id;
+ sc_ah->ah_info.vsi = &iwdev->vsi;
+ iwdev->rf->sc_dev.iw_uda_ops->init_ah(&rf->sc_dev, sc_ah);
+ ah->sgid_index = attr->grh.sgid_index;
+ sgid_attr = attr->grh.sgid_attr;
+ memcpy(&ah->dgid, &attr->grh.dgid, sizeof(ah->dgid));
+ rdma_gid2ip(&sgid_addr.saddr, &sgid_attr->gid);
+ rdma_gid2ip(&dgid_addr.saddr, &attr->grh.dgid);
+ ah->av.attrs = *attr;
+ ah->av.net_type = rdma_gid_attr_network_type(sgid_attr);
+ ah->av.sgid_addr.saddr = sgid_addr.saddr;
+ ah->av.dgid_addr.saddr = dgid_addr.saddr;
+ ah_info = &sc_ah->ah_info;
+ ah_info->ah = sc_ah;
+ ah_info->ah_idx = ah_id;
+ ah_info->pd_idx = pd->sc_pd.pd_id;
+ ether_addr_copy(ah_info->mac_addr, iwdev->netdev->dev_addr);
+ if (attr->ah_flags & IB_AH_GRH) {
+ ah_info->flow_label = attr->grh.flow_label;
+ ah_info->hop_ttl = attr->grh.hop_limit;
+ ah_info->tc_tos = attr->grh.traffic_class;
+ }
+
+ ether_addr_copy(dmac, attr->roce.dmac);
+ if (rdma_gid_attr_network_type(sgid_attr) == RDMA_NETWORK_IPV4) {
+ ah_info->ipv4_valid = true;
+ ah_info->dest_ip_addr[0] =
+ ntohl(dgid_addr.saddr_in.sin_addr.s_addr);
+ ah_info->src_ip_addr[0] =
+ ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ ah_info->do_lpbk = irdma_ipv4_is_lpb(ah_info->src_ip_addr[0],
+ ah_info->dest_ip_addr[0]);
+ if (ipv4_is_multicast(dgid_addr.saddr_in.sin_addr.s_addr))
+ irdma_mcast_mac(ah_info->dest_ip_addr, dmac, true);
+ } else {
+ irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
+ dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(ah_info->src_ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ ah_info->do_lpbk = irdma_ipv6_is_lpb(ah_info->src_ip_addr,
+ ah_info->dest_ip_addr);
+ if (rdma_is_multicast_addr(&dgid_addr.saddr_in6.sin6_addr))
+ irdma_mcast_mac(ah_info->dest_ip_addr, dmac, false);
+ }
+ if (sgid_attr->ndev && is_vlan_dev(sgid_attr->ndev))
+ ah_info->vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev);
+ else
+ ah_info->vlan_tag = IRDMA_NO_VLAN;
+
+ ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr,
+ ah_info->ipv4_valid, dmac);
+
+ if (ah_info->dst_arpindex == -1) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ if (ah_info->vlan_tag != 0xFFFF)
+ ah_info->insert_vlan_tag = true;
+
+ err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE,
+ flags & RDMA_CREATE_AH_SLEEPABLE,
+ irdma_gsi_ud_qp_ah_cb, sc_ah);
+ if (err) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP-OP Create AH fail");
+ goto error;
+ }
+
+ if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
+ int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD;
+
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ mdelay(1);
+ } while (!sc_ah->ah_info.ah_valid && --cnt);
+
+ if (!cnt) {
+ irdma_debug(&rf->sc_dev, IRDMA_DEBUG_ERR,
+ "CQP create AH timed out");
+ err = -ETIMEDOUT;
+ goto error;
+ }
+ }
+
+ irdma_add_pdusecount(pd);
+ if (udata) {
+ uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
+ err = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ }
+ return &ah->ibah;
+
+error:
+ kfree(ah);
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id);
+
+ return ERR_PTR(err);
+}
+
+/**
+ * irdma_destroy_ah - Destroy address handle
+ * @ah: pointer to address handle
+ */
+static int irdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct irdma_device *iwdev = to_iwdev(ibah->device);
+ struct irdma_ah *ah = to_iwah(ibah);
+ int err;
+
+ if (!ah->sc_ah.ah_info.ah_valid)
+ return -EINVAL;
+
+ err = irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY,
+ flags & RDMA_DESTROY_AH_SLEEPABLE,
+ irdma_destroy_ah_cb, ah);
+ if (!err)
+ return 0;
+
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs,
+ ah->sc_ah.ah_info.ah_idx);
+ irdma_rem_pdusecount(ah->pd, iwdev);
+ kfree(ah);
+
+ return 0;
+}
+
+/**
+ * irdma_query_ah - Query address handle
+ * @ib_ah: pointer to address handle
+ */
+static int irdma_query_ah(struct ib_ah *ibah,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct irdma_ah *ah = to_iwah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ if (ah->av.attrs.ah_flags & IB_AH_GRH) {
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.flow_label = ah->sc_ah.ah_info.flow_label;
+ ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos;
+ ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl;
+ ah_attr->grh.sgid_index = ah->sgid_index;
+ ah_attr->grh.sgid_index = ah->sgid_index;
+ memcpy(&ah_attr->grh.dgid, &ah->dgid, sizeof(ah_attr->grh.dgid));
+ }
+
+ return 0;
+}
+
+static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static __be64 irdma_mac_to_guid(struct net_device *ndev)
+{
+ unsigned char *mac = ndev->dev_addr;
+ __be64 guid;
+ unsigned char *dst = (unsigned char *)&guid;
+
+ dst[0] = mac[0] ^ 2;
+ dst[1] = mac[1];
+ dst[2] = mac[2];
+ dst[3] = 0xff;
+ dst[4] = 0xfe;
+ dst[5] = mac[3];
+ dst[6] = mac[4];
+ dst[7] = mac[5];
+
+ return guid;
+}
+
+static struct net_device *irdma_get_netdev(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ if (iwdev->netdev) {
+ dev_hold(iwdev->netdev);
+ return iwdev->netdev;
+ }
+
+ return NULL;
+}
+
+static const struct ib_device_ops irdma_roce_dev_ops = {
+ .get_link_layer = irdma_get_link_layer,
+ .query_ah = irdma_query_ah,
+ .attach_mcast = irdma_attach_mcast,
+ .detach_mcast = irdma_detach_mcast,
+ .query_gid = irdma_query_gid_roce,
+ .modify_qp = irdma_modify_qp_roce,
+};
+
+static const struct ib_device_ops irdma_iw_dev_ops = {
+ .query_gid = irdma_query_gid,
+ .modify_qp = irdma_modify_qp,
+};
+
+static const struct ib_device_ops irdma_dev_ops = {
+ .get_port_immutable = irdma_port_immutable,
+ .get_netdev = irdma_get_netdev,
+ .query_port = irdma_query_port,
+ .modify_port = irdma_modify_port,
+ .query_pkey = irdma_query_pkey,
+ .alloc_ucontext = irdma_alloc_ucontext,
+ .dealloc_ucontext = irdma_dealloc_ucontext,
+ .mmap = irdma_mmap,
+ .alloc_pd = irdma_alloc_pd,
+ .dealloc_pd = irdma_dealloc_pd,
+ .create_qp = irdma_create_qp,
+ .query_qp = irdma_query_qp,
+ .destroy_qp = irdma_destroy_qp,
+ .create_cq = irdma_create_cq,
+ .destroy_cq = irdma_destroy_cq,
+ .get_dma_mr = irdma_get_dma_mr,
+ .reg_user_mr = irdma_reg_user_mr,
+ .dereg_mr = irdma_dereg_mr,
+ .alloc_mw = irdma_alloc_mw,
+ .dealloc_mw = irdma_dealloc_mw,
+ .alloc_hw_stats = irdma_alloc_hw_stats,
+ .get_hw_stats = irdma_get_hw_stats,
+ .query_device = irdma_query_device,
+ .create_ah = irdma_create_ah,
+ .destroy_ah = irdma_destroy_ah,
+ .drain_sq = irdma_drain_sq,
+ .drain_rq = irdma_drain_rq,
+ .alloc_mr = irdma_alloc_mr,
+ .map_mr_sg = irdma_map_mr_sg,
+ .get_dev_fw_str = irdma_get_dev_fw_str,
+ .poll_cq = irdma_poll_cq,
+ .req_notify_cq = irdma_req_notify_cq,
+ .post_send = irdma_post_send,
+ .post_recv = irdma_post_recv,
+ .disassociate_ucontext = irdma_disassociate_ucontext,
+ INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
+};
+
+/**
+ * irdma_init_roce_device - initialization of roce rdma device
+ * @iwibdev: irdma ib device
+ */
+static void irdma_init_roce_device(struct irdma_ib_device *iwibdev)
+{
+ iwibdev->ibdev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+ iwibdev->ibdev.node_type = RDMA_NODE_IB_CA;
+ iwibdev->ibdev.node_guid = irdma_mac_to_guid(iwibdev->iwdev->netdev);
+ iwibdev->ibdev.uverbs_abi_ver = 1;
+ ib_set_device_ops(&iwibdev->ibdev, &irdma_roce_dev_ops);
+}
+
+/**
+ * irdma_init_roce_device - initialization of iwarp rdma device
+ * @iwibdev: irdma ib device
+ */
+static int irdma_init_iw_device(struct irdma_ib_device *iwibdev)
+{
+ struct net_device *netdev = iwibdev->iwdev->netdev;
+
+ iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
+ ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
+ iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
+ if (!iwibdev->ibdev.iwcm)
+ return -ENOMEM;
+
+ iwibdev->ibdev.iwcm->add_ref = irdma_add_ref;
+ iwibdev->ibdev.iwcm->rem_ref = irdma_rem_ref;
+ iwibdev->ibdev.iwcm->get_qp = irdma_get_qp;
+ iwibdev->ibdev.iwcm->connect = irdma_connect;
+ iwibdev->ibdev.iwcm->accept = irdma_accept;
+ iwibdev->ibdev.iwcm->reject = irdma_reject;
+ iwibdev->ibdev.iwcm->create_listen = irdma_create_listen;
+ iwibdev->ibdev.iwcm->destroy_listen = irdma_destroy_listen;
+ memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
+ sizeof(iwibdev->ibdev.iwcm->ifname));
+ ib_set_device_ops(&iwibdev->ibdev, &irdma_iw_dev_ops);
+
+ return 0;
+}
+
+/**
+ * irdma_init_rdma_device - initialization of rdma device
+ * @iwdev: irdma device
+ */
+static int irdma_init_rdma_device(struct irdma_device *iwdev)
+{
+ struct irdma_ib_device *iwibdev;
+ struct pci_dev *pcidev = (struct pci_dev *)iwdev->rf->hw.dev_context;
+ int ret;
+
+ iwibdev = ib_alloc_device(irdma_ib_device, ibdev);
+ if (!iwibdev)
+ return -ENOMEM;
+
+ iwibdev->ibdev.owner = THIS_MODULE;
+ iwdev->iwibdev = iwibdev;
+ iwibdev->iwdev = iwdev;
+
+ iwibdev->ibdev.uverbs_abi_ver = IRDMA_ABI_VER;
+ iwibdev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_BIND_MW) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND);
+
+ if (iwdev->roce_mode) {
+ irdma_init_roce_device(iwibdev);
+ } else {
+ ret = irdma_init_iw_device(iwibdev);
+ if (ret) {
+ ib_dealloc_device(&iwibdev->ibdev);
+ return ret;
+ }
+ }
+ iwibdev->ibdev.phys_port_cnt = 1;
+ iwibdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count;
+ iwibdev->ibdev.dev.parent = &pcidev->dev;
+ ib_set_device_ops(&iwibdev->ibdev, &irdma_dev_ops);
+
+ return 0;
+}
+
+/**
+ * irdma_port_ibevent - indicate port event
+ * @iwdev: iwarp device
+ */
+void irdma_port_ibevent(struct irdma_device *iwdev)
+{
+ struct irdma_ib_device *iwibdev = iwdev->iwibdev;
+ struct ib_event event;
+
+ event.device = &iwibdev->ibdev;
+ event.element.port_num = 1;
+ event.event = iwdev->iw_status ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&event);
+}
+
+/**
+ * irdma_destroy_rdma_device - destroy rdma device and free resources
+ * @iwibdev: IB device ptr
+ */
+void irdma_destroy_rdma_device(struct irdma_ib_device *iwibdev)
+{
+ if (!iwibdev)
+ return;
+
+ ib_unregister_device(&iwibdev->ibdev);
+ kfree(iwibdev->ibdev.iwcm);
+ iwibdev->ibdev.iwcm = NULL;
+ wait_event_timeout(iwibdev->iwdev->close_wq,
+ !atomic64_read(&iwibdev->iwdev->use_count),
+ IRDMA_EVENT_TIMEOUT);
+
+ ib_dealloc_device(&iwibdev->ibdev);
+}
+
+/**
+ * irdma_register_rdma_device - register iwarp device to IB
+ * @iwdev: iwarp device
+ */
+int irdma_register_rdma_device(struct irdma_device *iwdev)
+{
+ int ret;
+ struct irdma_ib_device *iwibdev;
+
+ ret = irdma_init_rdma_device(iwdev);
+ if (ret)
+ return ret;
+
+ iwibdev = iwdev->iwibdev;
+ rdma_set_device_sysfs_group(&iwibdev->ibdev, &irdma_attr_group);
+ if (iwdev->rf->sc_dev.hw_attrs.hw_rev == IRDMA_GEN_1)
+ /* backward compat with old user-space libi40iw */
+ iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
+ else
+ iwibdev->ibdev.driver_id = RDMA_DRIVER_IRDMA;
+
+ ret = ib_register_device(&iwibdev->ibdev, "irdma%d");
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ kfree(iwdev->iwibdev->ibdev.iwcm);
+ iwdev->iwibdev->ibdev.iwcm = NULL;
+ ib_dealloc_device(&iwdev->iwibdev->ibdev);
+ if (ret)
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "Register RDMA device fail\n");
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
new file mode 100644
index 0000000..63c78af
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_VERBS_H
+#define IRDMA_VERBS_H
+
+#define IRDMA_MAX_SAVED_PHY_PGADDR 4
+
+struct irdma_ucontext {
+ struct ib_ucontext ibucontext;
+ struct irdma_device *iwdev;
+ struct list_head cq_reg_mem_list;
+ spinlock_t cq_reg_mem_list_lock; /* protect CQ memory list */
+ struct list_head qp_reg_mem_list;
+ spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */
+ int abi_ver;
+};
+
+struct irdma_pd {
+ struct ib_pd ibpd;
+ struct irdma_sc_pd sc_pd;
+ atomic_t usecount;
+};
+
+struct irdma_av {
+ u8 macaddr[16];
+ struct rdma_ah_attr attrs;
+ union {
+ struct sockaddr saddr;
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+ } sgid_addr, dgid_addr;
+ u8 net_type;
+};
+
+struct irdma_ah {
+ struct ib_ah ibah;
+ struct irdma_sc_ah sc_ah;
+ atomic_t usecount;
+ struct irdma_pd *pd;
+ struct irdma_av av;
+ u8 sgid_index;
+ union ib_gid dgid;
+};
+
+struct irdma_hmc_pble {
+ union {
+ u32 idx;
+ dma_addr_t addr;
+ };
+};
+
+struct irdma_cq_mr {
+ struct irdma_hmc_pble cq_pbl;
+ dma_addr_t shadow;
+};
+
+struct irdma_qp_mr {
+ struct irdma_hmc_pble sq_pbl;
+ struct irdma_hmc_pble rq_pbl;
+ dma_addr_t shadow;
+ struct page *sq_page;
+};
+
+struct irdma_pbl {
+ struct list_head list;
+ union {
+ struct irdma_qp_mr qp_mr;
+ struct irdma_cq_mr cq_mr;
+ };
+
+ bool pbl_allocated;
+ bool on_list;
+ u64 user_base;
+ struct irdma_pble_alloc pble_alloc;
+ struct irdma_mr *iwmr;
+};
+
+struct irdma_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ struct ib_fmr ibfmr;
+ };
+ struct ib_umem *region;
+ u16 type;
+ u32 page_cnt;
+ u32 page_size;
+ u64 page_msk;
+ u32 npages;
+ u32 stag;
+ u64 len;
+ u64 pgaddrmem[IRDMA_MAX_SAVED_PHY_PGADDR];
+ struct irdma_pbl iwpbl;
+};
+
+struct irdma_cq {
+ struct ib_cq ibcq;
+ struct irdma_sc_cq sc_cq;
+ u16 cq_head;
+ u16 cq_size;
+ u16 cq_num;
+ bool user_mode;
+ u32 polled_cmpls;
+ u32 cq_mem_size;
+ struct irdma_dma_mem kmem;
+ spinlock_t lock; /* for poll cq */
+ struct irdma_pbl *iwpbl;
+};
+
+struct disconn_work {
+ struct work_struct work;
+ struct irdma_qp *iwqp;
+};
+
+struct iw_cm_id;
+
+struct irdma_qp_kmode {
+ struct irdma_dma_mem dma_mem;
+ u64 *wrid_mem;
+};
+
+struct irdma_qp {
+ struct ib_qp ibqp;
+ struct irdma_sc_qp sc_qp;
+ struct irdma_device *iwdev;
+ struct irdma_cq *iwscq;
+ struct irdma_cq *iwrcq;
+ struct irdma_pd *iwpd;
+ struct irdma_qp_host_ctx_info ctx_info;
+ union {
+ struct irdma_iwarp_offload_info iwarp_info;
+ struct irdma_roce_offload_info roce_info;
+ };
+
+ struct irdma_udp_offload_info udp_info;
+ struct irdma_ah roce_ah;
+ struct list_head teardown_entry;
+ void *allocated_buf;
+ atomic_t refcount;
+ struct iw_cm_id *cm_id;
+ void *cm_node;
+ struct ib_mr *lsmm_mr;
+ struct work_struct work;
+ atomic_t hw_mod_qp_pend;
+ enum ib_qp_state ibqp_state;
+ u32 iwarp_state;
+ u32 qp_mem_size;
+ u32 last_aeq;
+ atomic_t close_timer_started;
+ spinlock_t lock; /* serialize posting WRs to SQ/RQ */
+ struct irdma_qp_context *iwqp_context;
+ void *pbl_vbase;
+ dma_addr_t pbl_pbase;
+ struct page *page;
+ u8 active_conn:1;
+ u8 user_mode:1;
+ u8 hte_added:1;
+ u8 flush_issued:1;
+ u8 destroyed:1;
+ u8 sig_all:1;
+ u8 pau_mode:1;
+ u8 rsvd:1;
+ u16 term_sq_flush_code;
+ u16 term_rq_flush_code;
+ u8 hw_iwarp_state;
+ u8 hw_tcp_state;
+ struct irdma_qp_kmode kqp;
+ struct irdma_dma_mem host_ctx;
+ struct timer_list terminate_timer;
+ struct irdma_pbl *iwpbl;
+ struct irdma_dma_mem q2_ctx_mem;
+ struct irdma_dma_mem ietf_mem;
+ struct completion sq_drained;
+ struct completion rq_drained;
+ atomic_t mcast_ref_cnt;
+ wait_queue_head_t waitq;
+ wait_queue_head_t mod_qp_waitq;
+ u8 rts_ae_rcvd;
+};
+
+void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4);
+#endif /* IRDMA_VERBS_H */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 06c34d9..02bc8db 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -102,6 +102,7 @@ enum rdma_driver_id {
RDMA_DRIVER_RXE,
RDMA_DRIVER_HFI1,
RDMA_DRIVER_QIB,
+ RDMA_DRIVER_IRDMA,
};
#endif
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 19/19] RDMA/irdma: Update MAINTAINERS file
From: Shiraz Saleem @ 2019-02-15 17:11 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
Add maintainer entry for irdma driver.
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
MAINTAINERS | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 8c68de3c..1b4fbc4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7807,7 +7807,15 @@ L: linux-pm@vger.kernel.org
S: Supported
F: drivers/cpufreq/intel_pstate.c
-INTEL RDMA RNIC DRIVER
+INTEL ETHERNET RDMA DRIVER
+M: Mustafa Ismail <mustafa.ismail@intel.com>
+M: Shiraz Saleem <shiraz.saleem@intel.com>
+L: linux-rdma@vger.kernel.org
+S: Supported
+F: drivers/infiniband/hw/irdma/
+F: include/uapi/rdma/irdma-abi.h
+
+INTEL X722 RDMA RNIC DRIVER
M: Faisal Latif <faisal.latif@intel.com>
M: Shiraz Saleem <shiraz.saleem@intel.com>
L: linux-rdma@vger.kernel.org
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH net-next 10/13] net: mvpp2: reset the XPCS while reconfiguring the serdes lanes
From: Russell King - ARM Linux admin @ 2019-02-15 17:12 UTC (permalink / raw)
To: Antoine Tenart
Cc: davem, netdev, linux-kernel, thomas.petazzoni, maxime.chevallier,
gregory.clement, miquel.raynal, nadavh, stefanc, ymarkman, mw
In-Reply-To: <20190215153241.6857-11-antoine.tenart@bootlin.com>
On Fri, Feb 15, 2019 at 04:32:38PM +0100, Antoine Tenart wrote:
> The documentation advises to set the XPCS in reset while reconfiguring
> the serdes lanes. This seems to be a good thing to do, but the PPv2
> driver wasn't doing it. This patch fixes it.
Hmm. That statment seems to have some ambiguity in it - we do two
"reconfigurations" - one may be upon initialisation, where the lane
is already configured for 10Gbase-KR, and we're re-initialising it
for the same mode. The other case is when we're switching between
10Gbase-KR and SGMII, or as will be the case with 2.5G support for
the Alaska PHYs, 2500base-X.
Does this apply to reconfiguration of the serdes lane between
10Gbase-KR and slower modes?
>
> Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
> ---
> drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 1 +
> drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 +++++++-
> 2 files changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
> index 21ddcac1ceea..7380bddc53b8 100644
> --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
> +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
> @@ -481,6 +481,7 @@
> /* XPCS registers. PPv2.2 only */
> #define MVPP22_XPCS_BASE(port) (0x7400 + (port) * 0x1000)
> #define MVPP22_XPCS_CFG0 0x0
> +#define MVPP22_XPCS_CFG0_RESET_DIS BIT(0)
> #define MVPP22_XPCS_CFG0_PCS_MODE(n) ((n) << 3)
> #define MVPP22_XPCS_CFG0_ACTIVE_LANE(n) ((n) << 5)
>
> diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> index 4a18f8e54c90..5d05306e79a8 100644
> --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
> @@ -1016,13 +1016,19 @@ static void mvpp22_gop_init_10gkr(struct mvpp2_port *port)
> void __iomem *xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
> u32 val;
>
> - /* XPCS */
> + /* XPCS : Reset the XPCS when reconfiguring the lanes */
> + val = readl(xpcs + MVPP22_XPCS_CFG0);
> + writel(val & ~MVPP22_XPCS_CFG0_RESET_DIS, xpcs + MVPP22_XPCS_CFG0);
> +
> val = readl(xpcs + MVPP22_XPCS_CFG0);
> val &= ~(MVPP22_XPCS_CFG0_PCS_MODE(0x3) |
> MVPP22_XPCS_CFG0_ACTIVE_LANE(0x3));
> val |= MVPP22_XPCS_CFG0_ACTIVE_LANE(2);
> writel(val, xpcs + MVPP22_XPCS_CFG0);
>
> + val = readl(xpcs + MVPP22_XPCS_CFG0);
> + writel(val | MVPP22_XPCS_CFG0_RESET_DIS, xpcs + MVPP22_XPCS_CFG0);
> +
> /* MPCS */
> val = readl(mpcs + MVPP22_MPCS_CTRL);
> val &= ~MVPP22_MPCS_CTRL_FWD_ERR_CONN;
> --
> 2.20.1
>
>
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
^ permalink raw reply
* [RFC v1 18/19] RDMA/irdma: Add Kconfig and Makefile
From: Shiraz Saleem @ 2019-02-15 17:11 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
Add Kconfig and Makefile to build irdma driver
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/Kconfig | 1 +
drivers/infiniband/hw/Makefile | 1 +
drivers/infiniband/hw/irdma/Kconfig | 11 +++++++++++
drivers/infiniband/hw/irdma/Makefile | 31 +++++++++++++++++++++++++++++++
4 files changed, 44 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/Kconfig
create mode 100644 drivers/infiniband/hw/irdma/Makefile
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index a1fb840d..a841a07 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -95,6 +95,7 @@ source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/i40iw/Kconfig"
+source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index e4f31c1..bc79ef3 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += qib/
obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/
+obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_NES) += nes/
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
new file mode 100644
index 0000000..652f5f9
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -0,0 +1,11 @@
+config INFINIBAND_IRDMA
+ tristate "Intel(R) Ethernet Connection RDMA Driver"
+ depends on INET && (I40E || ICE)
+ depends on IPV6 || !IPV6
+ depends on PCI
+ select GENERIC_ALLOCATOR
+ ---help---
+ This is an Ethernet RDMA driver that supports E810 (iWARP/RoCE)
+ and X722 (iWARP) network devices.
+ To compile this driver as a module, choose M here. The module
+ will be called irdma.
diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile
new file mode 100644
index 0000000..cfc8126
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+# Copyright (c) 2019, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection RDMA Linux Driver
+#
+
+ccflags-y := -I $(srctree)/drivers/net/ethernet/intel/i40e
+ccflags-y += -I $(srctree)/drivers/net/ethernet/intel/ice
+
+obj-$(CONFIG_INFINIBAND_IRDMA) += irdma.o
+
+irdma-objs := main.o \
+ i40iw_if.o \
+ i40iw_hw.o \
+ irdma_if.o \
+ hw.o \
+ icrdma_hw.o \
+ cm.o \
+ ctrl.o \
+ hmc.o \
+ pble.o \
+ puda.o \
+ uk.o \
+ utils.o \
+ verbs.o \
+ uda.o \
+ ws.o \
+ trace.o \
+
+CFLAGS_trace.o = -I$(src)
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 09/19] RDMA/irdma: Add QoS definitions
From: Shiraz Saleem @ 2019-02-15 17:10 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Add definitions for managing the RDMA HW work scheduler (WS) tree.
A WS node is created via a control QP operation with the bandwidth
allocation, arbitration scheme, and traffic class of the QP specified.
The Qset handle returned associates the QoS parameters for the QP.
The Qset is registered with the LAN and a equivalent node is created
in the LAN packet scheduler tree.
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/ws.c | 449 +++++++++++++++++++++++++++++++++++++++
drivers/infiniband/hw/irdma/ws.h | 40 ++++
2 files changed, 489 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/ws.c
create mode 100644 drivers/infiniband/hw/irdma/ws.h
diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c
new file mode 100644
index 0000000..f9c0beb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ws.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "osdep.h"
+#include "status.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+
+#include "ws.h"
+
+/**
+ * irdma_alloc_node - Allocate a WS node and init
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ * @node_type: Type of node, leaf or parent
+ * @parent: parent node pointer
+ */
+static struct irdma_ws_node *irdma_alloc_node(struct irdma_sc_vsi *vsi,
+ u8 user_pri,
+ enum irdma_ws_node_type node_type,
+ struct irdma_ws_node *parent)
+{
+ struct irdma_virt_mem ws_mem;
+ struct irdma_ws_node *node;
+ u16 node_index = 0;
+
+ if (irdma_allocate_virt_mem(vsi->dev->hw, &ws_mem,
+ sizeof(struct irdma_ws_node)))
+ return NULL;
+
+ if (parent || vsi->vm_vf_type == IRDMA_VF_TYPE) {
+ node_index = irdma_alloc_ws_node_id(vsi->dev);
+ if (node_index == IRDMA_WS_NODE_INVALID) {
+ irdma_free_virt_mem(vsi->dev->hw, &ws_mem);
+ return NULL;
+ }
+ }
+
+ node = (struct irdma_ws_node *)ws_mem.va;
+ node->index = node_index;
+ node->vsi_index = vsi->vsi_idx;
+ INIT_LIST_HEAD(&node->siblings);
+ node->first_child = NULL;
+ if (node_type == WS_NODE_TYPE_LEAF) {
+ node->type_leaf = true;
+ node->traffic_class = vsi->qos[user_pri].traffic_class;
+ node->user_pri = user_pri;
+ node->rel_bw =
+ vsi->qos[user_pri].rel_bw;
+ node->lan_qs_handle =
+ vsi->qos[user_pri].lan_qos_handle;
+ node->prio_type = IRDMA_PRIO_WEIGHTED_RR;
+ } else {
+ node->rel_bw = 1;
+ node->prio_type = IRDMA_PRIO_WEIGHTED_RR;
+ }
+
+ node->parent = parent;
+
+ return node;
+}
+
+/**
+ * irdma_free_node - Free a WS node
+ * @node: Pointer to node to free
+ */
+static void irdma_free_node(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node)
+{
+ struct irdma_virt_mem ws_mem;
+
+ if (node->index)
+ irdma_free_ws_node_id(vsi->dev, node->index);
+
+ ws_mem.va = node;
+ ws_mem.size = sizeof(struct irdma_ws_node);
+ irdma_free_virt_mem(vsi->dev->hw, &ws_mem);
+}
+
+/**
+ * irdma_ws_cqp_cmd - Post CQP work scheduler node cmd
+ * @vsi: vsi pointer
+ * @node: pointer to node
+ * @cmd: add, remove or modify
+ * @user_pri: User priority for a leaf node
+ */
+static enum irdma_status_code
+irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node,
+ u8 cmd)
+{
+ struct irdma_ws_node_info node_info = {};
+
+ node_info.id = node->index;
+ node_info.vsi = node->vsi_index;
+ if (node->parent)
+ node_info.parent_id = node->parent->index;
+ else
+ node_info.parent_id = node_info.id;
+
+ node_info.weight = node->rel_bw;
+ node_info.tc = node->traffic_class;
+ node_info.prio_type = node->prio_type;
+ node_info.type_leaf = node->type_leaf;
+ node_info.enable = node->enable;
+ if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) {
+ irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "CQP WS CMD failed\n");
+ return IRDMA_ERR_NO_MEMORY;
+ }
+
+ if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) {
+ node->qs_handle = node_info.qs_handle;
+ vsi->qos[node->user_pri].qs_handle = node_info.qs_handle;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_node_from_entry - Given entry, get pointer to ws node struct
+ * @entry: Points to sibling list of a node
+ */
+static struct irdma_ws_node *irdma_node_from_entry(struct list_head *entry)
+{
+ if (entry)
+ return (struct irdma_ws_node *)((char *)entry
+ - offsetof(struct irdma_ws_node, siblings));
+
+ return NULL;
+}
+
+/**
+ * ws_find_node - Find SC WS node based on VSI id or TC
+ * @node: pointer to first node in level corresponding to VSI/TC
+ */
+static struct irdma_ws_node *ws_find_node(struct irdma_ws_node *root,
+ u16 match_val,
+ enum irdma_ws_match_type type)
+{
+ struct irdma_ws_node *node = root;
+ struct list_head *entry;
+
+ if (!root)
+ return NULL;
+
+ switch (type) {
+ case WS_MATCH_TYPE_VSI:
+ while (node && node->vsi_index != match_val) {
+ entry = node->siblings.next;
+ node = irdma_node_from_entry(entry);
+ if (node == root)
+ return NULL;
+ }
+ break;
+ case WS_MATCH_TYPE_TC:
+ while (node && node->traffic_class != match_val) {
+ entry = node->siblings.next;
+ node = irdma_node_from_entry(entry);
+ if (node == root)
+ return NULL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return node;
+}
+
+/**
+ * ws_tree_del_node - Delete node from SW WS tree structure
+ * @node: shared code node pointer
+ */
+static void ws_tree_del_node(struct irdma_ws_node *node)
+{
+ struct irdma_ws_node *parent = node->parent;
+ struct irdma_ws_node *next_entry;
+
+ if (parent) {
+ if (parent->first_child == node && list_empty(&node->siblings)) {
+ parent->first_child = NULL;
+ } else if (parent->first_child == node) {
+ next_entry = irdma_node_from_entry(node->siblings.next);
+ list_del(&node->siblings);
+ parent->first_child = next_entry;
+ } else {
+ list_del(&node->siblings);
+ }
+ }
+}
+
+/**
+ * irdma_tc_in_use - Checks to see if a leaf node is in use
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+static bool irdma_tc_in_use(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&vsi->qos[user_pri].lock, flags);
+ if (!list_empty(&vsi->qos[user_pri].qplist)) {
+ spin_unlock_irqrestore(&vsi->qos[user_pri].lock, flags);
+ return true;
+ }
+
+ /* Check if the traffic class associated with the given user priority
+ * is in use by any other user priority. If so, nothing left to do
+ */
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->qos[i].traffic_class == vsi->qos[user_pri].traffic_class &&
+ !list_empty(&vsi->qos[i].qplist)) {
+ spin_unlock_irqrestore(&vsi->qos[user_pri].lock, flags);
+ return true;
+ }
+ }
+ spin_unlock_irqrestore(&vsi->qos[user_pri].lock, flags);
+
+ return false;
+}
+
+/**
+ * irdma_remove_leaf - Remove leaf node unconditionally
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ struct irdma_ws_node *ws_tree_root, *vsi_node, *tc_node;
+
+ ws_tree_root = vsi->dev->ws_tree_root;
+ if (!ws_tree_root)
+ return;
+
+ vsi_node = ws_find_node(ws_tree_root->first_child, vsi->vsi_idx,
+ WS_MATCH_TYPE_VSI);
+ if (!vsi_node)
+ return;
+
+ tc_node = ws_find_node(vsi_node->first_child,
+ vsi->qos[user_pri].traffic_class,
+ WS_MATCH_TYPE_TC);
+ if (!tc_node)
+ return;
+
+ irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE);
+ irdma_lan_unregister_qset(vsi, tc_node);
+ ws_tree_del_node(tc_node);
+ irdma_free_node(vsi, tc_node);
+ /* Check if VSI node can be freed */
+ if (!vsi_node->first_child) {
+ irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE);
+ ws_tree_del_node(vsi_node);
+ irdma_free_node(vsi, vsi_node);
+ /* Free head node there are no remaining VSI nodes */
+ if (!ws_tree_root->first_child) {
+ irdma_ws_cqp_cmd(vsi, ws_tree_root,
+ IRDMA_OP_WS_DELETE_NODE);
+ irdma_free_node(vsi, ws_tree_root);
+ vsi->dev->ws_tree_root = NULL;
+ }
+ }
+}
+
+/**
+ * irdma_ws_add - Build work scheduler tree, set RDMA qs_handle
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ struct irdma_ws_node *ws_tree_root;
+ struct irdma_ws_node *vsi_node;
+ struct irdma_ws_node *tc_node;
+ u16 traffic_class;
+ enum irdma_status_code ret = 0;
+ int i;
+
+ mutex_lock(&vsi->dev->ws_mutex);
+ if (vsi->tc_change_pending) {
+ mutex_unlock(&vsi->dev->ws_mutex);
+ return IRDMA_ERR_NOT_READY;
+ }
+
+ ws_tree_root = vsi->dev->ws_tree_root;
+ if (!ws_tree_root) {
+ irdma_debug(vsi->dev, IRDMA_DEBUG_WS, "Creating root node\n");
+ ws_tree_root = irdma_alloc_node(vsi, user_pri,
+ WS_NODE_TYPE_PARENT, NULL);
+ if (!ws_tree_root) {
+ ret = IRDMA_ERR_NO_MEMORY;
+ goto exit;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, ws_tree_root);
+ goto exit;
+ }
+
+ vsi->dev->ws_tree_root = ws_tree_root;
+ }
+
+ /* Find a second tier node that matches the VSI */
+ vsi_node = ws_find_node(ws_tree_root->first_child, vsi->vsi_idx,
+ WS_MATCH_TYPE_VSI);
+
+ /* If VSI node doesn't exist, add one */
+ if (!vsi_node) {
+ irdma_debug(vsi->dev, IRDMA_DEBUG_WS,
+ "Node not found matching VSI %d\n", vsi->vsi_idx);
+ vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT,
+ ws_tree_root);
+ if (!vsi_node) {
+ ret = IRDMA_ERR_NO_MEMORY;
+ goto vsi_add_err;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, vsi_node);
+ goto vsi_add_err;
+ }
+
+ if (ws_tree_root->first_child)
+ list_add(&vsi_node->siblings,
+ &ws_tree_root->first_child->siblings);
+ else /* Connect to head node if this is the first VSI node */
+ ws_tree_root->first_child = vsi_node;
+ }
+
+ irdma_debug(vsi->dev, IRDMA_DEBUG_WS,
+ "Using node %d which represents VSI %d\n",
+ vsi_node->index, vsi->vsi_idx);
+ traffic_class = vsi->qos[user_pri].traffic_class;
+ tc_node = ws_find_node(vsi_node->first_child, traffic_class,
+ WS_MATCH_TYPE_TC);
+ if (!tc_node) {
+ /* Add leaf node */
+ irdma_debug(vsi->dev, IRDMA_DEBUG_WS,
+ "Node not found matching VSI %d and TC %d\n",
+ vsi->vsi_idx, traffic_class);
+ tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF,
+ vsi_node);
+ if (!tc_node) {
+ ret = IRDMA_ERR_NO_MEMORY;
+ goto leaf_add_err;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, tc_node);
+ goto leaf_add_err;
+ }
+
+ if (vsi_node->first_child)
+ list_add(&tc_node->siblings,
+ &vsi_node->first_child->siblings);
+ else
+ vsi_node->first_child = tc_node;
+
+ /*
+ * callback to LAN to update the LAN tree with our node
+ */
+ ret = irdma_lan_register_qset(vsi, tc_node);
+ if (ret)
+ goto reg_err;
+
+ tc_node->enable = true;
+ ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE);
+ if (ret)
+ goto reg_err;
+ }
+ irdma_debug(vsi->dev, IRDMA_DEBUG_WS,
+ "Using node %d which represents VSI %d TC %d\n",
+ tc_node->index, vsi->vsi_idx, traffic_class);
+ /*
+ * Iterate through other UPs and update the QS handle if they have
+ * a matching traffic class.
+ */
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->qos[i].traffic_class == traffic_class) {
+ vsi->qos[i].qs_handle = tc_node->qs_handle;
+ vsi->qos[i].lan_qos_handle = tc_node->lan_qs_handle;
+ vsi->qos[i].l2_sched_node_id = tc_node->l2_sched_node_id;
+ }
+ }
+ goto exit;
+
+leaf_add_err:
+ if (!vsi_node->first_child) {
+ if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE))
+ goto exit;
+ ws_tree_del_node(vsi_node);
+ irdma_free_node(vsi, vsi_node);
+ }
+
+vsi_add_err:
+ /* Free head node there are no remaining VSI nodes */
+ if (!ws_tree_root->first_child) {
+ irdma_ws_cqp_cmd(vsi, ws_tree_root,
+ IRDMA_OP_WS_DELETE_NODE);
+ vsi->dev->ws_tree_root = NULL;
+ ws_tree_del_node(ws_tree_root);
+ irdma_free_node(vsi, ws_tree_root);
+ }
+
+exit:
+ mutex_unlock(&vsi->dev->ws_mutex);
+ return ret;
+
+reg_err:
+ mutex_unlock(&vsi->dev->ws_mutex);
+ irdma_ws_remove(vsi, user_pri);
+ return ret;
+}
+
+/**
+ * irdma_ws_remove - Free WS scheduler node, update WS tree
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ mutex_lock(&vsi->dev->ws_mutex);
+ if (irdma_tc_in_use(vsi, user_pri))
+ goto exit;
+
+ irdma_remove_leaf(vsi, user_pri);
+exit:
+ mutex_unlock(&vsi->dev->ws_mutex);
+}
+
+/**
+ * irdma_ws_reset - Reset entire WS tree
+ * @vsi: vsi pointer
+ */
+void irdma_ws_reset(struct irdma_sc_vsi *vsi)
+{
+ u8 i;
+
+ mutex_lock(&vsi->dev->ws_mutex);
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; ++i)
+ irdma_remove_leaf(vsi, i);
+ mutex_unlock(&vsi->dev->ws_mutex);
+}
diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h
new file mode 100644
index 0000000..fe0474d
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ws.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_WS_H
+#define IRDMA_WS_H
+
+#include "osdep.h"
+
+enum irdma_ws_node_type {
+ WS_NODE_TYPE_PARENT,
+ WS_NODE_TYPE_LEAF,
+};
+
+enum irdma_ws_match_type {
+ WS_MATCH_TYPE_VSI,
+ WS_MATCH_TYPE_TC,
+};
+
+struct irdma_ws_node {
+ struct list_head siblings;
+ struct irdma_ws_node *first_child;
+ struct irdma_ws_node *parent;
+ u64 lan_qs_handle; /* opaque handle used by LAN */
+ u32 l2_sched_node_id;
+ u16 index;
+ u16 qs_handle;
+ u16 vsi_index;
+ u8 traffic_class;
+ u8 user_pri;
+ u8 rel_bw;
+ u8 abstraction_layer; /* used for splitting a TC */
+ u8 prio_type;
+ bool type_leaf;
+ bool enable;
+};
+
+enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri);
+void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri);
+void irdma_ws_reset(struct irdma_sc_vsi *vsi);
+#endif /* IRDMA_WS_H */
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 14/19] RDMA/irdma: Add user/kernel shared libraries
From: Shiraz Saleem @ 2019-02-15 17:11 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Building the WQE descriptors for different verb
operations are similar in kernel and user-space.
Add these shared libraries.
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/uk.c | 1680 ++++++++++++++++++++++++++++++++++++
drivers/infiniband/hw/irdma/user.h | 463 ++++++++++
2 files changed, 2143 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/uk.c
create mode 100644 drivers/infiniband/hw/irdma/user.h
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
new file mode 100644
index 0000000..d71d0d8
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -0,0 +1,1680 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "osdep.h"
+#include "status.h"
+#include "defs.h"
+#include "user.h"
+#include "irdma.h"
+
+/**
+ * irdma_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ */
+static void irdma_set_fragment(__le64 *wqe,
+ u32 offset,
+ struct irdma_sge *sge,
+ u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset,
+ LS_64(sge->tag_off, IRDMAQPSQ_FRAG_TO));
+ set_64bit_val(wqe, offset + 8,
+ LS_64(valid, IRDMAQPSQ_VALID) |
+ LS_64(sge->len, IRDMAQPSQ_FRAG_LEN) |
+ LS_64(sge->stag, IRDMAQPSQ_FRAG_STAG));
+ } else {
+ set_64bit_val(wqe, offset, 0);
+ set_64bit_val(wqe, offset + 8,
+ LS_64(valid, IRDMAQPSQ_VALID));
+ }
+}
+
+/**
+ * irdma_set_fragment_gen_1 - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ */
+static void irdma_set_fragment_gen_1(__le64 *wqe,
+ u32 offset,
+ struct irdma_sge *sge,
+ u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset,
+ LS_64(sge->tag_off, IRDMAQPSQ_FRAG_TO));
+ set_64bit_val(wqe, offset + 8,
+ LS_64(sge->len, IRDMAQPSQ_GEN1_FRAG_LEN) |
+ LS_64(sge->stag, IRDMAQPSQ_GEN1_FRAG_STAG));
+ } else {
+ set_64bit_val(wqe, offset, 0);
+ set_64bit_val(wqe, offset + 8, 0);
+ }
+}
+
+/**
+ * irdma_nop_1 - insert a NOP wqe
+ * @qp: hw qp ptr
+ */
+static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp)
+{
+ u64 hdr;
+ __le64 *wqe;
+ u32 wqe_idx;
+ bool signaled = false;
+
+ if (!qp->sq_ring.head)
+ return IRDMA_ERR_PARAM;
+
+ wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ wqe = qp->sq_base[wqe_idx].elem;
+
+ qp->sq_wrtrk_array[wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = LS_64(IRDMAQP_OP_NOP, IRDMAQPSQ_OPCODE) |
+ LS_64(signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ /* make sure WQE is written before valid bit is set */
+ wmb();
+
+ set_64bit_val(wqe, 24, hdr);
+
+ return 0;
+}
+
+/**
+ * irdma_qp_post_wr - ring doorbell
+ * @qp: hw qp ptr
+ */
+void irdma_qp_post_wr(struct irdma_qp_uk *qp)
+{
+ u64 temp;
+ u32 hw_sq_tail;
+ u32 sw_sq_head;
+
+ /* valid bit is written and loads completed before reading shadow */
+ mb();
+
+ /* read the doorbell shadow area */
+ get_64bit_val(qp->shadow_area, 0, &temp);
+
+ hw_sq_tail = (u32)RS_64(temp, IRDMA_QP_DBSA_HW_SQ_TAIL);
+ sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ if (sw_sq_head != qp->initial_ring.head) {
+ if (qp->push_mode) {
+ writel(qp->qp_id, qp->wqe_alloc_db);
+ qp->push_mode = false;
+ } else if (sw_sq_head != hw_sq_tail) {
+ if (sw_sq_head > qp->initial_ring.head) {
+ if (hw_sq_tail >= qp->initial_ring.head &&
+ hw_sq_tail < sw_sq_head) {
+ writel(qp->qp_id, qp->wqe_alloc_db);
+ }
+ } else {
+ if (hw_sq_tail >= qp->initial_ring.head ||
+ hw_sq_tail < sw_sq_head) {
+ writel(qp->qp_id, qp->wqe_alloc_db);
+ }
+ }
+ }
+ }
+
+ qp->initial_ring.head = qp->sq_ring.head;
+}
+
+/**
+ * irdma_qp_ring_push_db - ring qp doorbell
+ * @qp: hw qp ptr
+ * @wqe_idx: wqe index
+ */
+static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx)
+{
+ set_32bit_val(qp->push_db,
+ 0,
+ LS_32(wqe_idx >> 3, IRDMA_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
+ qp->initial_ring.head = qp->sq_ring.head;
+ qp->push_mode = true;
+}
+
+void irdma_qp_push_wqe(struct irdma_qp_uk *qp,
+ __le64 *wqe,
+ u16 quanta,
+ u32 wqe_idx,
+ bool post_sq)
+{
+ __le64 *push;
+
+ if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) !=
+ IRDMA_RING_CURRENT_TAIL(qp->sq_ring) &&
+ !(qp->push_mode)) {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ } else {
+ push = (__le64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x7) * 0x20);
+ memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE);
+ irdma_qp_ring_push_db(qp, wqe_idx);
+ }
+}
+
+/**
+ * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @quanta: size of WR in quanta
+ * @total_size: size of WR in bytes
+ * @info: info on WR
+ */
+__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp,
+ u32 *wqe_idx,
+ u16 quanta,
+ u32 total_size,
+ struct irdma_post_sq_info *info)
+{
+ __le64 *wqe;
+ u64 *wqe_0 = NULL;
+ u32 nop_wqe_idx;
+ u16 nop_cnt;
+ u16 i;
+
+ nop_cnt = IRDMA_RING_CURRENT_HEAD(qp->sq_ring) % qp->hw_attrs->max_hw_sq_chunk;
+ if (nop_cnt)
+ nop_cnt = qp->hw_attrs->max_hw_sq_chunk - nop_cnt;
+
+ if (quanta > nop_cnt) {
+ /* Need to pad with NOP */
+ /* Make sure SQ has room for nop_cnt + quanta */
+ if ((u32)(quanta + nop_cnt) > IRDMA_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+
+ /* pad with NOP */
+ nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ for (i = 0; i < nop_cnt; i++) {
+ irdma_nop_1(qp);
+ IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
+ }
+ if (qp->push_db && nop_cnt && info->push_wqe) {
+ irdma_qp_push_wqe(qp,
+ qp->sq_base[nop_wqe_idx].elem,
+ nop_cnt, nop_wqe_idx, true);
+ }
+ } else {
+ /* no need to pad with NOP */
+ if (quanta > IRDMA_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+ }
+
+ *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+
+ IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
+ wqe = qp->sq_base[*wqe_idx].elem;
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_1 &&
+ quanta == 1 &&
+ (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) {
+ wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem;
+ wqe_0[3] = LS_64(!qp->swqe_polarity, IRDMAQPSQ_VALID);
+ }
+ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
+
+ return wqe;
+}
+
+/**
+ * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx)
+{
+ __le64 *wqe;
+ enum irdma_status_code ret_code;
+
+ if (IRDMA_RING_FULL_ERR(qp->rq_ring))
+ return NULL;
+
+ IRDMA_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ qp->rwqe_polarity = !qp->rwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 32 byte quanta in in one rq wqe */
+ wqe = qp->rq_base[*wqe_idx * (qp->rq_wqe_size_multiplier)].elem;
+
+ return wqe;
+}
+
+/**
+ * irdma_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code irdma_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ u64 hdr;
+ __le64 *wqe;
+ struct irdma_rdma_write *op_info;
+ u32 i, wqe_idx;
+ u32 total_size = 0, byte_off;
+ enum irdma_status_code ret_code;
+ u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ u16 quanta;
+
+ info->push_wqe = qp->push_db ? true : false;
+
+ op_info = &info->op.rdma_write;
+ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].len;
+
+ if (total_size > IRDMA_MAX_OUTBOUND_MSG_SIZE)
+ return IRDMA_ERR_QP_INVALID_MSG_SIZE;
+
+ read_fence |= info->read_fence;
+
+ if (info->imm_data_valid)
+ frag_cnt = op_info->num_lo_sges + 1;
+ else
+ frag_cnt = op_info->num_lo_sges;
+ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0;
+ ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta,
+ total_size, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ set_64bit_val(wqe,
+ 16,
+ LS_64(op_info->rem_addr.tag_off, IRDMAQPSQ_FRAG_TO));
+
+ if (info->imm_data_valid) {
+ set_64bit_val(wqe, 0,
+ LS_64(info->imm_data, IRDMAQPSQ_IMMDATA));
+ i = 0;
+ } else {
+ qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->lo_sg_list,
+ qp->swqe_polarity);
+ i = 1;
+ }
+
+ for (byte_off = 32; i < op_info->num_lo_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->hw_attrs->hw_rev > IRDMA_GEN_1 && !(frag_cnt & 0x01) &&
+ frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ if (!op_info->rem_addr.stag && !total_size)
+ op_info->rem_addr.stag = 0x1234;
+ hdr = LS_64(op_info->rem_addr.stag, IRDMAQPSQ_REMSTAG) |
+ LS_64(info->op_type, IRDMAQPSQ_OPCODE) |
+ LS_64((info->imm_data_valid ? 1 : 0), IRDMAQPSQ_IMMDATAFLAG) |
+ LS_64((info->report_rtt ? 1 : 0), IRDMAQPSQ_REPORTRTT) |
+ LS_64(addl_frag_cnt, IRDMAQPSQ_ADDFRAGCNT) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(read_fence, IRDMAQPSQ_READFENCE) |
+ LS_64(info->local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code irdma_rdma_read(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool inv_stag,
+ bool post_sq)
+{
+ struct irdma_rdma_read *op_info;
+ enum irdma_status_code ret_code;
+ u32 i, byte_off, total_size = 0;
+ bool local_fence = false;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u32 wqe_idx;
+ u16 quanta;
+ u64 hdr;
+
+ info->push_wqe = qp->push_db ? true : false;
+
+ op_info = &info->op.rdma_read;
+ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].len;
+
+ ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta,
+ total_size, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ addl_frag_cnt = op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0;
+ local_fence |= info->local_fence;
+
+ qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->lo_sg_list,
+ qp->swqe_polarity);
+ for (i = 1, byte_off = 32; i < op_info->num_lo_sges; ++i) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->hw_attrs->hw_rev > IRDMA_GEN_1 &&
+ !(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+ set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, IRDMAQPSQ_FRAG_TO));
+ hdr = LS_64(op_info->rem_addr.stag, IRDMAQPSQ_REMSTAG) |
+ LS_64((info->report_rtt ? 1 : 0), IRDMAQPSQ_REPORTRTT) |
+ LS_64(addl_frag_cnt, IRDMAQPSQ_ADDFRAGCNT) |
+ LS_64((inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ), IRDMAQPSQ_OPCODE) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(info->read_fence || qp->force_fence ? 1 : 0, IRDMAQPSQ_READFENCE) |
+ LS_64(local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code irdma_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_post_send *op_info;
+ u64 hdr;
+ u32 i, wqe_idx, total_size = 0, byte_off;
+ enum irdma_status_code ret_code;
+ u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ u16 quanta;
+
+ info->push_wqe = qp->push_db ? true : false;
+
+ op_info = &info->op.send;
+ if (qp->max_sq_frag_cnt < op_info->num_sges)
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].len;
+
+ if (info->imm_data_valid)
+ frag_cnt = op_info->num_sges + 1;
+ else
+ frag_cnt = op_info->num_sges;
+ ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta,
+ total_size, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ read_fence |= info->read_fence;
+ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0;
+ if (info->imm_data_valid) {
+ set_64bit_val(wqe, 0,
+ LS_64(info->imm_data, IRDMAQPSQ_IMMDATA));
+ i = 0;
+ } else {
+ qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list,
+ qp->swqe_polarity);
+ i = 1;
+ }
+
+ for (byte_off = 32; i < op_info->num_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->hw_attrs->hw_rev > IRDMA_GEN_1 && !(frag_cnt & 0x01) &&
+ frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe,
+ 16,
+ LS_64(op_info->qkey, IRDMAQPSQ_DESTQKEY) |
+ LS_64(op_info->dest_qp, IRDMAQPSQ_DESTQPN));
+ hdr = LS_64(info->stag_to_inv, IRDMAQPSQ_REMSTAG) |
+ LS_64(op_info->ah_id, IRDMAQPSQ_AHID) |
+ LS_64((info->imm_data_valid ? 1 : 0), IRDMAQPSQ_IMMDATAFLAG) |
+ LS_64((info->report_rtt ? 1 : 0), IRDMAQPSQ_REPORTRTT) |
+ LS_64(info->op_type, IRDMAQPSQ_OPCODE) |
+ LS_64(addl_frag_cnt, IRDMAQPSQ_ADDFRAGCNT) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(read_fence, IRDMAQPSQ_READFENCE) |
+ LS_64(info->local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(info->udp_hdr, IRDMAQPSQ_UDPHEADER) |
+ LS_64(info->l4len, IRDMAQPSQ_L4LEN) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_set_mw_bind_wqe_gen_1 - set mw bind wqe
+ * @wqe: wqe for setting fragment
+ * @op_info: info for setting bind wqe values
+ */
+static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe,
+ struct irdma_bind_window *op_info)
+{
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ LS_64(op_info->mw_stag, IRDMAQPSQ_PARENTMRSTAG) |
+ LS_64(op_info->mr_stag, IRDMAQPSQ_MWSTAG));
+ set_64bit_val(wqe, 16, op_info->bind_len);
+}
+
+/**
+ * irdma_copy_inline_data_gen_1 - Copy inline data to wqe
+ * @dest: pointer to wqe
+ * @src: pointer to inline data
+ * @len: length of inline data to copy
+ * @polarity: compatibility parameter
+ */
+static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len, u8 polarity)
+{
+ if (len <= 16) {
+ memcpy(dest, src, len);
+ } else {
+ memcpy(dest, src, 16);
+ src += 16;
+ dest = dest + 32;
+ memcpy(dest, src, len - 16);
+ }
+}
+
+/**
+ * irdma_inline_data_size_to_quanta_gen_1 - based on inline data, quanta
+ * @data_size: data size for inline
+ * @quanta: size of sq wqe returned
+ * @max_size: maximum allowed inline size
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static enum irdma_status_code irdma_inline_data_size_to_quanta_gen_1(u32 data_size,
+ u16 *quanta,
+ u32 max_size)
+{
+ if (data_size > max_size)
+ return IRDMA_ERR_INVALID_INLINE_DATA_SIZE;
+
+ if (data_size <= 16)
+ *quanta = IRDMA_QP_WQE_MIN_QUANTA;
+ else
+ *quanta = 2;
+
+ return 0;
+}
+
+/**
+ * irdma_set_mw_bind_wqe - set mw bind in wqe
+ * @wqe: wqe for setting mw bind
+ * @op_info: info for setting wqe values
+ */
+static void irdma_set_mw_bind_wqe(__le64 *wqe,
+ struct irdma_bind_window *op_info)
+{
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ LS_64(op_info->mr_stag, IRDMAQPSQ_PARENTMRSTAG) |
+ LS_64(op_info->mw_stag, IRDMAQPSQ_MWSTAG));
+ set_64bit_val(wqe, 16, op_info->bind_len);
+}
+
+/**
+ * irdma_copy_inline_data - Copy inline data to wqe
+ * @dest: pointer to wqe
+ * @src: pointer to inline data
+ * @len: length of inline data to copy
+ * @polarity: polarity of wqe valid bit
+ */
+static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 polarity)
+{
+ u8 inline_valid = polarity << IRDMA_INLINE_VALID_S;
+ u32 copy_size;
+
+ dest += 8;
+ if (len <= 8) {
+ memcpy(dest, src, len);
+ return;
+ }
+
+ *((u64 *)dest) = *((u64 *)src);
+ len -= 8;
+ src += 8;
+ dest += 24; /* point to additional 32 byte quanta */
+
+ while (len) {
+ copy_size = len < 31 ? len : 31;
+ memcpy(dest, src, copy_size);
+ *(dest + 31) = inline_valid;
+ len -= copy_size;
+ dest += 32;
+ src += copy_size;
+ }
+}
+
+/**
+ * irdma_inline_data_size_to_quanta - based on inline data, quanta
+ * @data_size: data size for inline
+ * @quanta: size of sq wqe returned
+ * @max_size: maximum allowed inline size
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static enum irdma_status_code irdma_inline_data_size_to_quanta(u32 data_size,
+ u16 *quanta,
+ u32 max_size)
+{
+ if (data_size > max_size)
+ return IRDMA_ERR_INVALID_INLINE_DATA_SIZE;
+
+ if (data_size <= 8)
+ *quanta = IRDMA_QP_WQE_MIN_QUANTA;
+ else if (data_size <= 39)
+ *quanta = 2;
+ else if (data_size <= 70)
+ *quanta = 3;
+ else if (data_size <= 101)
+ *quanta = 4;
+ else if (data_size <= 132)
+ *quanta = 5;
+ else if (data_size <= 163)
+ *quanta = 6;
+ else if (data_size <= 194)
+ *quanta = 7;
+ else
+ *quanta = 8;
+
+ return 0;
+}
+
+/**
+ * irdma_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code
+irdma_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_inline_rdma_write *op_info;
+ u64 hdr = 0;
+ u32 wqe_idx;
+ enum irdma_status_code ret_code;
+ bool read_fence = false;
+ u16 quanta;
+
+ info->push_wqe = qp->push_db ? true : false;
+ op_info = &info->op.inline_rdma_write;
+ ret_code = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, &quanta,
+ qp->hw_attrs->max_hw_inline);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta,
+ op_info->len, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ read_fence |= info->read_fence;
+ set_64bit_val(wqe, 16,
+ LS_64(op_info->rem_addr.tag_off, IRDMAQPSQ_FRAG_TO));
+
+ hdr = LS_64(op_info->rem_addr.stag, IRDMAQPSQ_REMSTAG) |
+ LS_64(info->op_type, IRDMAQPSQ_OPCODE) |
+ LS_64(op_info->len, IRDMAQPSQ_INLINEDATALEN) |
+ LS_64((info->report_rtt ? 1 : 0), IRDMAQPSQ_REPORTRTT) |
+ LS_64(1, IRDMAQPSQ_INLINEDATAFLAG) |
+ LS_64((info->imm_data_valid ? 1 : 0), IRDMAQPSQ_IMMDATAFLAG) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(read_fence, IRDMAQPSQ_READFENCE) |
+ LS_64(info->local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ if (info->imm_data_valid)
+ set_64bit_val(wqe, 0,
+ LS_64(info->imm_data, IRDMAQPSQ_IMMDATA));
+
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
+ qp->swqe_polarity);
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code irdma_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_post_inline_send *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ enum irdma_status_code ret_code;
+ bool read_fence = false;
+ u16 quanta;
+
+ info->push_wqe = qp->push_db ? true : false;
+ op_info = &info->op.inline_send;
+
+ ret_code = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len, &quanta,
+ qp->hw_attrs->max_hw_inline);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta,
+ op_info->len, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ set_64bit_val(wqe, 16,
+ LS_64(op_info->qkey, IRDMAQPSQ_DESTQKEY) |
+ LS_64(op_info->dest_qp, IRDMAQPSQ_DESTQPN));
+
+ read_fence |= info->read_fence;
+ hdr = LS_64(info->stag_to_inv, IRDMAQPSQ_REMSTAG) |
+ LS_64(op_info->ah_id, IRDMAQPSQ_AHID) |
+ LS_64(info->op_type, IRDMAQPSQ_OPCODE) |
+ LS_64(op_info->len, IRDMAQPSQ_INLINEDATALEN) |
+ LS_64((info->imm_data_valid ? 1 : 0), IRDMAQPSQ_IMMDATAFLAG) |
+ LS_64((info->report_rtt ? 1 : 0), IRDMAQPSQ_REPORTRTT) |
+ LS_64(1, IRDMAQPSQ_INLINEDATAFLAG) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(read_fence, IRDMAQPSQ_READFENCE) |
+ LS_64(info->local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(info->udp_hdr, IRDMAQPSQ_UDPHEADER) |
+ LS_64(info->l4len, IRDMAQPSQ_L4LEN) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ if (info->imm_data_valid)
+ set_64bit_val(wqe, 0,
+ LS_64(info->imm_data, IRDMAQPSQ_IMMDATA));
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, qp->swqe_polarity);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code
+irdma_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_inv_local_stag *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ bool local_fence = false;
+ struct irdma_sge sge = {};
+
+ info->push_wqe = qp->push_db ? true : false;
+ op_info = &info->op.inv_local_stag;
+ local_fence = info->local_fence;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
+ 0, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ sge.stag = op_info->target_stag;
+ qp->wqe_ops.iw_set_fragment(wqe, 0, &sge, 0);
+
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = LS_64(IRDMA_OP_TYPE_INV_STAG, IRDMAQPSQ_OPCODE) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(info->read_fence, IRDMAQPSQ_READFENCE) |
+ LS_64(local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx,
+ post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_mw_bind - bind Memory Window
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum irdma_status_code irdma_mw_bind(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_bind_window *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ bool local_fence = false;
+
+ info->push_wqe = qp->push_db ? true : false;
+ op_info = &info->op.bind_window;
+ local_fence |= info->local_fence;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx,
+ IRDMA_QP_WQE_MIN_QUANTA, 0, info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info);
+
+ hdr = LS_64(IRDMA_OP_TYPE_BIND_MW, IRDMAQPSQ_OPCODE) |
+ LS_64(((op_info->ena_reads << 2) | (op_info->ena_writes << 3)),
+ IRDMAQPSQ_STAGRIGHTS) |
+ LS_64((op_info->addressing_type == IRDMA_ADDR_TYPE_VA_BASED ? 1 : 0),
+ IRDMAQPSQ_VABASEDTO) |
+ LS_64((op_info->mem_window_type_1 ? 1 : 0), IRDMAQPSQ_MEMWINDOWTYPE) |
+ LS_64((info->push_wqe ? 1 : 0), IRDMAQPSQ_PUSHWQE) |
+ LS_64(info->read_fence, IRDMAQPSQ_READFENCE) |
+ LS_64(local_fence, IRDMAQPSQ_LOCALFENCE) |
+ LS_64(info->signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (info->push_wqe) {
+ irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, post_sq);
+ } else {
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+static enum irdma_status_code
+irdma_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info)
+{
+ u32 total_size = 0, wqe_idx, i, byte_off;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u64 hdr;
+
+ if (qp->max_rq_frag_cnt < info->num_sges)
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+
+ for (i = 0; i < info->num_sges; i++)
+ total_size += info->sg_list[i].len;
+
+ wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ qp->rq_wrid_array[wqe_idx] = info->wr_id;
+ addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0;
+ qp->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list,
+ qp->rwqe_polarity);
+
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i],
+ qp->rwqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->hw_attrs->hw_rev > IRDMA_GEN_1 && !(info->num_sges & 0x01) &&
+ info->num_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL, qp->rwqe_polarity);
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16, 0);
+ hdr = LS_64(addl_frag_cnt, IRDMAQPSQ_ADDFRAGCNT) |
+ LS_64(qp->rwqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ return 0;
+}
+
+/**
+ * irdma_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+static void irdma_cq_request_notification(struct irdma_cq_uk *cq,
+ enum irdma_cmpl_notify cq_notify)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se = 0;
+ u8 arm_next = 0;
+ u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 32, &temp_val);
+ arm_seq_num = (u8)RS_64(temp_val, IRDMA_CQ_DBSA_ARM_SEQ_NUM);
+ arm_seq_num++;
+ sw_cq_sel = (u16)RS_64(temp_val, IRDMA_CQ_DBSA_SW_CQ_SELECT);
+ arm_next_se = (u8)RS_64(temp_val, IRDMA_CQ_DBSA_ARM_NEXT_SE);
+ arm_next_se |= 1;
+ if (cq_notify == IRDMA_CQ_COMPL_EVENT)
+ arm_next = 1;
+ temp_val = LS_64(arm_seq_num, IRDMA_CQ_DBSA_ARM_SEQ_NUM) |
+ LS_64(sw_cq_sel, IRDMA_CQ_DBSA_SW_CQ_SELECT) |
+ LS_64(arm_next_se, IRDMA_CQ_DBSA_ARM_NEXT_SE) |
+ LS_64(arm_next, IRDMA_CQ_DBSA_ARM_NEXT);
+
+ set_64bit_val(cq->shadow_area, 32, temp_val);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ writel(cq->cq_id, cq->cqe_alloc_db);
+}
+
+/**
+ * irdma_cq_post_entries - update tail in shadow memory
+ * @cq: hw cq
+ * @count: # of entries processed
+ */
+static enum irdma_status_code irdma_cq_post_entries(struct irdma_cq_uk *cq,
+ u8 count)
+{
+ IRDMA_RING_MOVE_TAIL_BY_COUNT(cq->cq_ring, count);
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+
+ return 0;
+}
+
+/**
+ * irdma_cq_poll_cmpl - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ * @post_cq: update cq tail
+ */
+static enum irdma_status_code
+irdma_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info)
+{
+ u64 comp_ctx, qword0, qword2, qword3, qword4, qword6, qword7, wqe_qword;
+ __le64 *cqe, *sw_wqe;
+ struct irdma_qp_uk *qp;
+ struct irdma_ring *pring = NULL;
+ u32 wqe_idx, q_type, array_idx = 0;
+ enum irdma_status_code ret_code = 0;
+ bool move_cq_head = true;
+ u8 polarity;
+ bool ext_valid;
+ __le64 *ext_cqe;
+ u32 peek_head;
+
+ if (cq->avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq);
+
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)RS_64(qword3, IRDMA_CQ_VALID);
+ if (polarity != cq->polarity)
+ return IRDMA_ERR_Q_EMPTY;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ rmb();
+
+ ext_valid = (bool)RS_64(qword3, IRDMA_CQ_EXTCQE);
+ if (ext_valid) {
+ if (cq->avoid_mem_cflct) {
+ ext_cqe = (__le64 *)((u8 *)cqe + 32);
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)RS_64(qword7, IRDMA_CQ_VALID);
+ } else {
+ peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size;
+ ext_cqe = cq->cq_base[peek_head].buf;
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)RS_64(qword7, IRDMA_CQ_VALID);
+ if (!peek_head)
+ polarity ^= 1;
+ }
+ if (polarity != cq->polarity)
+ return IRDMA_ERR_Q_EMPTY;
+
+ /* Ensure ext CQE contents are read after ext valid bit is checked */
+ rmb();
+
+ info->imm_valid = (bool)RS_64(qword7, IRDMA_CQ_IMMVALID);
+ if (info->imm_valid) {
+ get_64bit_val(ext_cqe, 0, &qword4);
+ info->imm_data = (u32)RS_64(qword4, IRDMA_CQ_IMMDATALOW32);
+ }
+ info->ud_smac_valid = (bool)RS_64(qword7, IRDMA_CQ_UDSMACVALID);
+ info->ud_vlan_valid = (bool)RS_64(qword7, IRDMA_CQ_UDVLANVALID);
+ if (info->ud_smac_valid || info->ud_vlan_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ if (info->ud_vlan_valid)
+ info->ud_vlan = (u16)RS_64(qword6, IRDMA_CQ_UDVLAN);
+ if (info->ud_smac_valid) {
+ info->ud_smac[5] = qword6 & 0xFF;
+ info->ud_smac[4] = (qword6 >> 8) & 0xFF;
+ info->ud_smac[3] = (qword6 >> 16) & 0xFF;
+ info->ud_smac[2] = (qword6 >> 24) & 0xFF;
+ info->ud_smac[1] = (qword6 >> 32) & 0xFF;
+ info->ud_smac[0] = (qword6 >> 40) & 0xFF;
+ }
+ }
+ } else {
+ info->imm_valid = false;
+ info->ud_smac_valid = false;
+ info->ud_vlan_valid = false;
+ }
+
+ q_type = (u8)RS_64(qword3, IRDMA_CQ_SQ);
+ info->error = (bool)RS_64(qword3, IRDMA_CQ_ERROR);
+ info->push_dropped = (bool)RS_64(qword3, IRDMACQ_PSHDROP);
+ info->ipv4 = (bool)RS_64(qword3, IRDMACQ_IPV4);
+ if (info->error) {
+ info->major_err = RS_64(qword3, IRDMA_CQ_MAJERR);
+ info->minor_err = RS_64(qword3, IRDMA_CQ_MINERR);
+ if (info->major_err == IRDMA_FLUSH_MAJOR_ERR)
+ info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ else if (info->major_err == IRDMA_LEN_MAJOR_ERR)
+ info->comp_status = IRDMA_COMPL_STATUS_INVALID_LEN;
+ else
+ info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+ } else {
+ info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->tcp_seq_num_rtt = (u32)RS_64(qword0, IRDMACQ_TCPSEQNUMRTT);
+ info->qp_id = (u32)RS_64(qword2, IRDMACQ_QPID);
+ info->ud_src_qpn = (u32)RS_64(qword2, IRDMACQ_UDSRCQPN);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+
+ info->solicited_event = (bool)RS_64(qword3, IRDMACQ_SOEVENT);
+
+ qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ if (!qp) {
+ ret_code = IRDMA_ERR_Q_DESTROYED;
+ goto exit;
+ }
+ wqe_idx = (u32)RS_64(qword3, IRDMA_CQ_WQEIDX);
+ info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
+
+ if (q_type == IRDMA_CQE_QTYPE_RQ) {
+ array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
+ if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED ||
+ info->comp_status == IRDMA_COMPL_STATUS_INVALID_LEN) {
+ if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) {
+ ret_code = IRDMA_ERR_Q_EMPTY;
+ goto exit;
+ }
+
+ info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
+ array_idx = qp->rq_ring.tail;
+ } else {
+ info->wr_id = qp->rq_wrid_array[array_idx];
+ }
+
+ if (info->imm_valid)
+ info->op_type = IRDMA_OP_TYPE_REC_IMM;
+ else
+ info->op_type = IRDMA_OP_TYPE_REC;
+ if (qword3 & IRDMACQ_STAG_M) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)RS_64(qword2, IRDMACQ_INVSTAG);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ info->bytes_xfered = (u32)RS_64(qword0, IRDMACQ_PAYLDLEN);
+ IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
+ if (!IRDMA_RING_MORE_WORK(qp->rq_ring) &&
+ info->comp_status == IRDMA_COMPL_STATUS_FLUSHED)
+ qp->rq_flush_complete = true;
+
+ pring = &qp->rq_ring;
+ } else { /* q_type is IRDMA_CQE_QTYPE_SQ */
+ if (qp->first_sq_wq) {
+ qp->first_sq_wq = false;
+ if (!wqe_idx && qp->sq_ring.head == qp->sq_ring.tail) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+ memset(info, 0, sizeof(struct irdma_cq_poll_info));
+ return irdma_cq_poll_cmpl(cq, info);
+ }
+ }
+ /*cease posting push mode on push drop*/
+ if (info->push_dropped)
+ qp->push_mode = false;
+
+ if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) {
+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ if (!info->comp_status)
+ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+ info->op_type = (u8)RS_64(qword3, IRDMACQ_OP);
+ sw_wqe = qp->sq_base[wqe_idx].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ IRDMA_RING_SET_TAIL(qp->sq_ring,
+ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
+ } else {
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) {
+ ret_code = IRDMA_ERR_Q_EMPTY;
+ goto exit;
+ }
+
+ do {
+ u8 op_type;
+ u32 tail;
+
+ tail = qp->sq_ring.tail;
+ sw_wqe = qp->sq_base[tail].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ op_type = (u8)RS_64(wqe_qword, IRDMAQPSQ_OPCODE);
+ info->op_type = op_type;
+ IRDMA_RING_SET_TAIL(qp->sq_ring,
+ tail + qp->sq_wrtrk_array[tail].quanta);
+ if (op_type != IRDMAQP_OP_NOP) {
+ info->wr_id = qp->sq_wrtrk_array[tail].wrid;
+ info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
+ break;
+ }
+ } while (1);
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
+ qp->sq_flush_complete = true;
+ }
+ pring = &qp->sq_ring;
+ }
+
+ ret_code = 0;
+
+exit:
+ if (!ret_code &&
+ info->comp_status == IRDMA_COMPL_STATUS_FLUSHED)
+ if (pring && (IRDMA_RING_MORE_WORK(*pring)))
+ move_cq_head = false;
+
+ if (move_cq_head) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) == 0)
+ cq->polarity ^= 1;
+
+ if (ext_valid && !cq->avoid_mem_cflct) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) == 0)
+ cq->polarity ^= 1;
+ }
+
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ if (!cq->avoid_mem_cflct && ext_valid)
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+ } else {
+ qword3 &= ~IRDMA_CQ_WQEIDX_M;
+ qword3 |= LS_64(pring->tail, IRDMA_CQ_WQEIDX);
+ set_64bit_val(cqe, 24, qword3);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_qp_roundup - return round up qp wq depth
+ * @wqdepth: wq depth in quanta to round up
+ */
+static int irdma_qp_round_up(u32 wqdepth)
+{
+ int scount = 1;
+
+ for (wqdepth--; scount <= 16; scount *= 2)
+ wqdepth |= wqdepth >> scount;
+
+ return ++wqdepth;
+}
+
+/**
+ * irdma_get_wqe_shift - get shift count for maximum wqe size
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @inline_data: Maximum inline data size
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
+ * For 1 SGE or inline data <= 8, shift = 0 (wqe size of 32
+ * bytes). For 2 or 3 SGEs or inline data <= 39, shift = 1 (wqe
+ * size of 64 bytes).
+ * For 4-7 SGE's and inline <= 101 Shift of 2 otherwise (wqe
+ * size of 256 bytes).
+ */
+void irdma_get_wqe_shift(struct irdma_hw_attrs *hw_attrs,
+ u32 sge,
+ u32 inline_data,
+ u8 *shift)
+{
+ *shift = 0;
+ if (hw_attrs->hw_rev > IRDMA_GEN_1) {
+ if (sge > 1 || inline_data > 8) {
+ if (sge < 4 && inline_data <= 39)
+ *shift = 1;
+ else if (sge < 8 && inline_data <= 101)
+ *shift = 2;
+ else
+ *shift = 3;
+ }
+ } else if (sge > 1 || inline_data > 16) {
+ *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
+ }
+}
+
+/*
+ * irdma_get_sqdepth - get SQ depth (quanta)
+ * @sq_size: SQ size
+ * @shift: shift which determines size of WQE
+ * @sqdepth: depth of SQ
+ *
+ */
+enum irdma_status_code irdma_get_sqdepth(struct irdma_hw_attrs *hw_attrs,
+ u32 sq_size,
+ u8 shift,
+ u32 *sqdepth)
+{
+ *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD);
+
+ if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift))
+ *sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift;
+ else if (*sqdepth > hw_attrs->max_hw_wq_quanta)
+ return IRDMA_ERR_INVALID_SIZE;
+
+ return 0;
+}
+
+/*
+ * irdma_get_rq_depth - get RQ depth (quanta)
+ * @sq_size: SQ size
+ * @shift: shift which determines size of WQE
+ * @wqdepth: depth of RQ
+ *
+ */
+enum irdma_status_code irdma_get_rqdepth(struct irdma_hw_attrs *hw_attrs,
+ u32 rq_size,
+ u8 shift,
+ u32 *rqdepth)
+{
+ *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD);
+
+ if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift))
+ *rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift;
+ else if (*rqdepth > hw_attrs->max_hw_rq_quanta)
+ return IRDMA_ERR_INVALID_SIZE;
+
+ return 0;
+}
+
+static struct irdma_qp_uk_ops iw_qp_uk_ops = {
+ .iw_qp_post_wr = irdma_qp_post_wr,
+ .iw_qp_ring_push_db = irdma_qp_ring_push_db,
+ .iw_rdma_write = irdma_rdma_write,
+ .iw_rdma_read = irdma_rdma_read,
+ .iw_send = irdma_send,
+ .iw_inline_rdma_write = irdma_inline_rdma_write,
+ .iw_inline_send = irdma_inline_send,
+ .iw_stag_local_invalidate = irdma_stag_local_invalidate,
+ .iw_mw_bind = irdma_mw_bind,
+ .iw_post_receive = irdma_post_receive,
+ .iw_post_nop = irdma_nop,
+};
+
+static struct irdma_wqe_uk_ops iw_wqe_uk_ops = {
+ .iw_set_fragment = irdma_set_fragment,
+ .iw_copy_inline_data = irdma_copy_inline_data,
+ .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta,
+ .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe,
+};
+
+static struct irdma_wqe_uk_ops iw_wqe_uk_ops_gen_1 = {
+ .iw_set_fragment = irdma_set_fragment_gen_1,
+ .iw_copy_inline_data = irdma_copy_inline_data_gen_1,
+ .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta_gen_1,
+ .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe_gen_1,
+};
+
+static struct irdma_cq_ops iw_cq_ops = {
+ .iw_cq_request_notification = irdma_cq_request_notification,
+ .iw_cq_poll_cmpl = irdma_cq_poll_cmpl,
+ .iw_cq_post_entries = irdma_cq_post_entries,
+ .iw_cq_clean = irdma_clean_cq,
+};
+
+static struct irdma_device_uk_ops iw_device_uk_ops = {
+ .iw_cq_uk_init = irdma_cq_uk_init,
+ .iw_qp_uk_init = irdma_qp_uk_init,
+};
+
+/**
+ * irdma_setup_connection_wqes - setup WQEs necessary to complete
+ * connection.
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ * @ret_code: return status of moving ring head
+ */
+static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info,
+ enum irdma_status_code *ret_code)
+{
+ u16 move_cnt = 1;
+
+ if (qp->hw_attrs->feature_flags & IRDMA_FEATURE_RTS_AE)
+ move_cnt = 3;
+
+ IRDMA_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring, move_cnt, *ret_code);
+ IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt);
+ IRDMA_RING_MOVE_HEAD_BY_COUNT(qp->initial_ring, move_cnt, *ret_code);
+}
+
+/**
+ * irdma_qp_uk_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq.
+ */
+enum irdma_status_code irdma_qp_uk_init(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info)
+{
+ enum irdma_status_code ret_code = 0;
+ u32 sq_ring_size;
+ u8 sqshift, rqshift;
+
+ qp->hw_attrs = info->hw_attrs;
+ if (info->max_sq_frag_cnt > qp->hw_attrs->max_hw_wq_frags)
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+
+ if (info->max_rq_frag_cnt > qp->hw_attrs->max_hw_wq_frags)
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_1)
+ irdma_get_wqe_shift(qp->hw_attrs, info->max_sq_frag_cnt,
+ info->max_inline_data, &sqshift);
+ else
+ irdma_get_wqe_shift(qp->hw_attrs, info->max_sq_frag_cnt + 1,
+ info->max_inline_data, &sqshift);
+ irdma_get_wqe_shift(qp->hw_attrs, info->max_rq_frag_cnt, 0, &rqshift);
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_1)
+ rqshift = 2;
+ qp->qp_caps = info->qp_caps;
+ qp->sq_base = info->sq;
+ qp->rq_base = info->rq;
+ qp->shadow_area = info->shadow_area;
+ qp->sq_wrtrk_array = info->sq_wrtrk_array;
+ qp->rq_wrid_array = info->rq_wrid_array;
+ qp->wqe_alloc_db = info->wqe_alloc_db;
+ qp->qp_id = info->qp_id;
+ qp->sq_size = info->sq_size;
+ qp->push_db = info->push_db;
+ qp->push_wqe = info->push_wqe;
+ qp->push_mode = false;
+ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+ sq_ring_size = qp->sq_size << sqshift;
+ IRDMA_RING_INIT(qp->sq_ring, sq_ring_size);
+ IRDMA_RING_INIT(qp->initial_ring, sq_ring_size);
+ if (info->first_sq_wq) {
+ irdma_setup_connection_wqes(qp, info, &ret_code);
+ qp->swqe_polarity = 1;
+ qp->first_sq_wq = true;
+ } else {
+ qp->swqe_polarity = 0;
+ }
+ qp->swqe_polarity_deferred = 1;
+ qp->rwqe_polarity = 0;
+ qp->rq_size = info->rq_size;
+ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+ qp->max_inline_data = info->max_inline_data;
+ qp->rq_wqe_size = rqshift;
+ IRDMA_RING_INIT(qp->rq_ring, qp->rq_size);
+ qp->rq_wqe_size_multiplier = 1 << rqshift;
+ qp->qp_ops = iw_qp_uk_ops;
+ if (qp->hw_attrs->hw_rev == IRDMA_GEN_1)
+ qp->wqe_ops = iw_wqe_uk_ops_gen_1;
+ else
+ qp->wqe_ops = iw_wqe_uk_ops;
+
+ return ret_code;
+}
+
+/**
+ * irdma_cq_uk_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+enum irdma_status_code irdma_cq_uk_init(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info)
+{
+ cq->cq_base = (struct irdma_cqe *)info->cq_base;
+ cq->cq_id = info->cq_id;
+ cq->cq_size = info->cq_size;
+ cq->cqe_alloc_db = info->cqe_alloc_db;
+ cq->cq_ack_db = info->cq_ack_db;
+ cq->shadow_area = info->shadow_area;
+ cq->avoid_mem_cflct = info->avoid_mem_cflct;
+ IRDMA_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+ cq->ops = iw_cq_ops;
+
+ return 0;
+}
+
+/**
+ * irdma_device_init_uk - setup routines for iwarp shared device
+ * @dev: iwarp shared (user and kernel)
+ */
+void irdma_device_init_uk(struct irdma_dev_uk *dev)
+{
+ dev->ops_uk = iw_device_uk_ops;
+}
+
+/**
+ * irdma_clean_cq - clean cq entries
+ * @ queue completion context
+ * @cq: cq to clean
+ */
+void irdma_clean_cq(void *q, struct irdma_cq_uk *cq)
+{
+ __le64 *cqe;
+ u64 qword3, comp_ctx;
+ u32 cq_head;
+ u8 polarity, temp;
+
+ cq_head = cq->cq_ring.head;
+ temp = cq->polarity;
+ do {
+ if (cq->avoid_mem_cflct)
+ cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf;
+ else
+ cqe = cq->cq_base[cq_head].buf;
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)RS_64(qword3, IRDMA_CQ_VALID);
+
+ if (polarity != temp)
+ break;
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if ((void *)(unsigned long)comp_ctx == q)
+ set_64bit_val(cqe, 8, 0);
+
+ cq_head = (cq_head + 1) % cq->cq_ring.size;
+ if (!cq_head)
+ temp ^= 1;
+ } while (true);
+}
+
+/**
+ * irdma_nop - post a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: signaled for completion
+ * @post_sq: ring doorbell
+ */
+enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp,
+ u64 wr_id,
+ bool signaled,
+ bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 wqe_idx;
+ struct irdma_post_sq_info info = {};
+
+ info.push_wqe = false;
+ info.wr_id = wr_id;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
+ 0, &info);
+ if (!wqe)
+ return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = LS_64(IRDMAQP_OP_NOP, IRDMAQPSQ_OPCODE) |
+ LS_64(signaled, IRDMAQPSQ_SIGCOMPL) |
+ LS_64(qp->swqe_polarity, IRDMAQPSQ_VALID);
+
+ wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (post_sq)
+ irdma_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @quanta: quanta for frag_cnt
+ */
+enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *quanta = IRDMA_QP_WQE_MIN_QUANTA;
+ break;
+ case 2:
+ case 3:
+ *quanta = 2;
+ break;
+ case 4:
+ case 5:
+ *quanta = 3;
+ break;
+ case 6:
+ case 7:
+ *quanta = 4;
+ break;
+ case 8:
+ case 9:
+ *quanta = 5;
+ break;
+ case 10:
+ case 11:
+ *quanta = 6;
+ break;
+ case 12:
+ case 13:
+ *quanta = 7;
+ break;
+ case 14:
+ case 15: /* when immediate data is present */
+ *quanta = 8;
+ break;
+ default:
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size in bytes given frag_cnt
+ */
+enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *wqe_size = 32;
+ break;
+ case 2:
+ case 3:
+ *wqe_size = 64;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ *wqe_size = 128;
+ break;
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ *wqe_size = 256;
+ break;
+ default:
+ return IRDMA_ERR_INVALID_FRAG_COUNT;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
new file mode 100644
index 0000000..8853c5b
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -0,0 +1,463 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_USER_H
+#define IRDMA_USER_H
+
+#define irdma_handle void *
+#define irdma_adapter_handle irdma_handle
+#define irdma_qp_handle irdma_handle
+#define irdma_cq_handle irdma_handle
+#define irdma_pd_id irdma_handle
+#define irdma_stag_handle irdma_handle
+#define irdma_stag_index u32
+#define irdma_stag u32
+#define irdma_stag_key u8
+#define irdma_tagged_offset u64
+#define irdma_access_privileges u32
+#define irdma_physical_fragment u64
+#define irdma_address_list u64 *
+#define irdma_sgl struct irdma_sge *
+
+#define IRDMA_MAX_MR_SIZE 0x10000000000L
+
+#define IRDMA_ACCESS_FLAGS_LOCALREAD 0x01
+#define IRDMA_ACCESS_FLAGS_LOCALWRITE 0x02
+#define IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
+#define IRDMA_ACCESS_FLAGS_REMOTEREAD 0x05
+#define IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
+#define IRDMA_ACCESS_FLAGS_REMOTEWRITE 0x0a
+#define IRDMA_ACCESS_FLAGS_BIND_WINDOW 0x10
+#define IRDMA_ACCESS_FLAGS_ALL 0x1f
+
+#define IRDMA_OP_TYPE_RDMA_WRITE 0x00
+#define IRDMA_OP_TYPE_RDMA_READ 0x01
+#define IRDMA_OP_TYPE_SEND 0x03
+#define IRDMA_OP_TYPE_SEND_INV 0x04
+#define IRDMA_OP_TYPE_SEND_SOL 0x05
+#define IRDMA_OP_TYPE_SEND_SOL_INV 0x06
+#define IRDMA_OP_TYPE_RDMA_WRITE_SOL 0x0d
+#define IRDMA_OP_TYPE_BIND_MW 0x08
+#define IRDMA_OP_TYPE_FAST_REG_NSMR 0x09
+#define IRDMA_OP_TYPE_INV_STAG 0x0a
+#define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b
+#define IRDMA_OP_TYPE_NOP 0x0c
+#define IRDMA_OP_TYPE_REC 0x3e
+#define IRDMA_OP_TYPE_REC_IMM 0x3f
+
+#define IRDMA_FLUSH_MAJOR_ERR 1
+#define IRDMA_LEN_MAJOR_ERR 2
+
+enum irdma_device_caps_const {
+ IRDMA_WQE_SIZE = 4,
+ IRDMA_CQP_WQE_SIZE = 8,
+ IRDMA_CQE_SIZE = 4,
+ IRDMA_EXTENDED_CQE_SIZE = 8,
+ IRDMA_AEQE_SIZE = 2,
+ IRDMA_CEQE_SIZE = 1,
+ IRDMA_CQP_CTX_SIZE = 8,
+ IRDMA_SHADOW_AREA_SIZE = 8,
+ IRDMA_QUERY_FPM_BUF_SIZE = 176,
+ IRDMA_COMMIT_FPM_BUF_SIZE = 176,
+ IRDMA_GATHER_STATS_BUF_SIZE = 1024,
+ IRDMA_MIN_IW_QP_ID = 0,
+ IRDMA_MAX_IW_QP_ID = 262143,
+ IRDMA_MIN_CEQID = 0,
+ IRDMA_MAX_CEQID = 1023,
+ IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1,
+ IRDMA_MIN_CQID = 0,
+ IRDMA_MAX_CQID = 524287,
+ IRDMA_MIN_AEQ_ENTRIES = 1,
+ IRDMA_MAX_AEQ_ENTRIES = 524287,
+ IRDMA_MIN_CEQ_ENTRIES = 1,
+ IRDMA_MAX_CEQ_ENTRIES = 524288,
+ IRDMA_MIN_CQ_SIZE = 1,
+ IRDMA_MAX_CQ_SIZE = 1048575,
+ IRDMA_DB_ID_ZERO = 0,
+ IRDMA_MAX_WQ_FRAGMENT_COUNT = 13,
+ IRDMA_MAX_SGE_RD = 13,
+ IRDMA_MAX_OUTBOUND_MSG_SIZE = 2147483647,
+ IRDMA_MAX_INBOUND_MSG_SIZE = 2147483647,
+ IRDMA_MAX_PUSH_PAGE_COUNT = 4096,
+ IRDMA_MAX_PE_ENA_VF_COUNT = 32,
+ IRDMA_MAX_VF_FPM_ID = 47,
+ IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496,
+ IRDMA_MAX_INLINE_DATA_SIZE = 224,
+ IRDMA_MAX_PUSHMODE_INLINE_DATA_SIZE = 224,
+ IRDMA_MAX_IRD_SIZE = 127,
+ IRDMA_MAX_ORD_SIZE = 255,
+ IRDMA_MAX_WQ_ENTRIES = 32768,
+ IRDMA_Q2_BUF_SIZE = 256,
+ IRDMA_QP_CTX_SIZE = 256,
+ IRDMA_MAX_PDS = 262144,
+};
+
+enum irdma_addressing_type {
+ IRDMA_ADDR_TYPE_ZERO_BASED = 0,
+ IRDMA_ADDR_TYPE_VA_BASED = 1,
+};
+
+enum irdma_cmpl_status {
+ IRDMA_COMPL_STATUS_SUCCESS = 0,
+ IRDMA_COMPL_STATUS_FLUSHED,
+ IRDMA_COMPL_STATUS_INVALID_WQE,
+ IRDMA_COMPL_STATUS_QP_CATASTROPHIC,
+ IRDMA_COMPL_STATUS_REMOTE_TERMINATION,
+ IRDMA_COMPL_STATUS_INVALID_STAG,
+ IRDMA_COMPL_STATUS_BASE_BOUND_VIOLATION,
+ IRDMA_COMPL_STATUS_ACCESS_VIOLATION,
+ IRDMA_COMPL_STATUS_INVALID_PD_ID,
+ IRDMA_COMPL_STATUS_WRAP_ERROR,
+ IRDMA_COMPL_STATUS_STAG_INVALID_PDID,
+ IRDMA_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+ IRDMA_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+ IRDMA_COMPL_STATUS_STAG_NOT_INVALID,
+ IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_SIZE,
+ IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY,
+ IRDMA_COMPL_STATUS_INVALID_FBO,
+ IRDMA_COMPL_STATUS_INVALID_LEN,
+ IRDMA_COMPL_STATUS_INVALID_ACCESS,
+ IRDMA_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG,
+ IRDMA_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+ IRDMA_COMPL_STATUS_INVALID_REGION,
+ IRDMA_COMPL_STATUS_INVALID_WINDOW,
+ IRDMA_COMPL_STATUS_INVALID_TOTAL_LEN,
+ IRDMA_COMPL_STATUS_UNKNOWN,
+};
+
+enum irdma_cmpl_notify {
+ IRDMA_CQ_COMPL_EVENT = 0,
+ IRDMA_CQ_COMPL_SOLICITED = 1,
+};
+
+enum irdma_qp_caps {
+ IRDMA_WRITE_WITH_IMM = 1,
+ IRDMA_SEND_WITH_IMM = 2,
+ IRDMA_ROCE = 4,
+};
+
+struct irdma_qp_uk;
+struct irdma_cq_uk;
+struct irdma_qp_uk_init_info;
+struct irdma_cq_uk_init_info;
+
+struct irdma_sge {
+ irdma_tagged_offset tag_off;
+ u32 len;
+ irdma_stag stag;
+};
+
+struct irdma_ring {
+ u32 head;
+ u32 tail;
+ u32 size;
+};
+
+struct irdma_cqe {
+ __le64 buf[IRDMA_CQE_SIZE];
+};
+
+struct irdma_extended_cqe {
+ __le64 buf[IRDMA_EXTENDED_CQE_SIZE];
+};
+
+struct irdma_post_send {
+ irdma_sgl sg_list;
+ u32 num_sges;
+ u32 qkey;
+ u32 dest_qp;
+ u32 ah_id;
+};
+
+struct irdma_post_inline_send {
+ void *data;
+ u32 len;
+ u32 qkey;
+ u32 dest_qp;
+ u32 ah_id;
+};
+
+struct irdma_rdma_write {
+ irdma_sgl lo_sg_list;
+ u32 num_lo_sges;
+ struct irdma_sge rem_addr;
+};
+
+struct irdma_inline_rdma_write {
+ void *data;
+ u32 len;
+ struct irdma_sge rem_addr;
+};
+
+struct irdma_rdma_read {
+ irdma_sgl lo_sg_list;
+ u32 num_lo_sges;
+ struct irdma_sge rem_addr;
+};
+
+struct irdma_bind_window {
+ irdma_stag mr_stag;
+ u64 bind_len;
+ void *va;
+ enum irdma_addressing_type addressing_type;
+ bool ena_reads;
+ bool ena_writes;
+ irdma_stag mw_stag;
+ bool mem_window_type_1;
+};
+
+struct irdma_inv_local_stag {
+ irdma_stag target_stag;
+};
+
+struct irdma_post_sq_info {
+ u64 wr_id;
+ u8 op_type;
+ u8 l4len;
+ bool signaled;
+ bool read_fence;
+ bool local_fence;
+ bool inline_data;
+ bool imm_data_valid;
+ bool push_wqe;
+ bool report_rtt;
+ bool udp_hdr;
+ u32 imm_data;
+ u32 stag_to_inv;
+ bool defer_flag;
+ union {
+ struct irdma_post_send send;
+ struct irdma_rdma_write rdma_write;
+ struct irdma_rdma_read rdma_read;
+ struct irdma_bind_window bind_window;
+ struct irdma_inv_local_stag inv_local_stag;
+ struct irdma_inline_rdma_write inline_rdma_write;
+ struct irdma_post_inline_send inline_send;
+ } op;
+};
+
+struct irdma_post_rq_info {
+ u64 wr_id;
+ irdma_sgl sg_list;
+ u32 num_sges;
+};
+
+struct irdma_cq_poll_info {
+ u64 wr_id;
+ irdma_qp_handle qp_handle;
+ u32 bytes_xfered;
+ u32 tcp_seq_num_rtt;
+ u32 qp_id;
+ u32 ud_src_qpn;
+ u32 imm_data;
+ irdma_stag inv_stag; /* or L_R_Key */
+ enum irdma_cmpl_status comp_status;
+ u16 major_err;
+ u16 minor_err;
+ u16 ud_vlan;
+ u8 ud_smac[6];
+ u8 op_type;
+ bool stag_invalid_set; /* or L_R_Key set */
+ bool push_dropped;
+ bool error;
+ bool solicited_event;
+ bool ipv4;
+ bool ud_vlan_valid;
+ bool ud_smac_valid;
+ bool imm_valid;
+};
+
+struct irdma_qp_uk_ops {
+ void (*iw_qp_post_wr)(struct irdma_qp_uk *qp);
+ void (*iw_qp_ring_push_db)(struct irdma_qp_uk *qp, u32 wqe_index);
+ enum irdma_status_code (*iw_rdma_write)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+ enum irdma_status_code (*iw_rdma_read)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq);
+ enum irdma_status_code (*iw_send)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+ enum irdma_status_code (*iw_inline_rdma_write)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+ enum irdma_status_code (*iw_inline_send)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+ enum irdma_status_code (*iw_stag_local_invalidate)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+ enum irdma_status_code (*iw_mw_bind)(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+ enum irdma_status_code (*iw_post_receive)(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info);
+ enum irdma_status_code (*iw_post_nop)(struct irdma_qp_uk *qp,
+ u64 wr_id,
+ bool signaled,
+ bool post_sq);
+};
+
+struct irdma_wqe_uk_ops {
+ void (*iw_set_fragment)(__le64 *wqe,
+ u32 offset,
+ struct irdma_sge *sge,
+ u8 valid);
+ void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity);
+ enum irdma_status_code (*iw_inline_data_size_to_quanta)(u32 data_size,
+ u16 *quanta,
+ u32 max_size);
+ void (*iw_set_mw_bind_wqe)(__le64 *wqe, struct irdma_bind_window *op_info);
+};
+
+struct irdma_cq_ops {
+ void (*iw_cq_request_notification)(struct irdma_cq_uk *cq,
+ enum irdma_cmpl_notify cq_notify);
+ enum irdma_status_code (*iw_cq_poll_cmpl)(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info);
+ enum irdma_status_code (*iw_cq_post_entries)(struct irdma_cq_uk *cq,
+ u8 count);
+ void (*iw_cq_clean)(void *q, struct irdma_cq_uk *cq);
+};
+
+struct irdma_dev_uk;
+
+struct irdma_device_uk_ops {
+ enum irdma_status_code (*iw_cq_uk_init)(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info);
+ enum irdma_status_code (*iw_qp_uk_init)(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info);
+};
+
+struct irdma_dev_uk {
+ struct irdma_device_uk_ops ops_uk;
+};
+
+struct irdma_sq_uk_wr_trk_info {
+ u64 wrid;
+ u32 wr_len;
+ u16 quanta;
+ u8 reserved[2];
+};
+
+struct irdma_qp_quanta {
+ __le64 elem[IRDMA_WQE_SIZE];
+};
+
+struct irdma_qp_uk {
+ struct irdma_qp_quanta *sq_base;
+ struct irdma_qp_quanta *rq_base;
+ struct irdma_hw_attrs *hw_attrs;
+ u32 __iomem *wqe_alloc_db;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ __le64 *shadow_area;
+ u32 *push_db;
+ __le64 *push_wqe;
+ struct irdma_ring sq_ring;
+ struct irdma_ring rq_ring;
+ struct irdma_ring initial_ring;
+ u32 qp_id;
+ u32 qp_caps;
+ u32 sq_size;
+ u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
+ struct irdma_qp_uk_ops qp_ops;
+ struct irdma_wqe_uk_ops wqe_ops;
+ u8 swqe_polarity;
+ u8 swqe_polarity_deferred;
+ u8 rwqe_polarity;
+ u8 rq_wqe_size;
+ u8 rq_wqe_size_multiplier;
+ bool deferred_flag;
+ bool push_mode; /* whether the last post wqe was pushed */
+ bool first_sq_wq;
+ bool force_fence;
+ bool sq_flush_complete; /* Indicates flush was seen and SQ was empty after the flush */
+ bool rq_flush_complete; /* Indicates flush was seen and RQ was empty after the flush */
+ u8 dbg_rq_flushed;
+};
+
+struct irdma_cq_uk {
+ struct irdma_cqe *cq_base;
+ u32 __iomem *cqe_alloc_db;
+ u32 __iomem *cq_ack_db;
+ __le64 *shadow_area;
+ u32 cq_id;
+ u32 cq_size;
+ struct irdma_ring cq_ring;
+ u8 polarity;
+ bool avoid_mem_cflct;
+ struct irdma_cq_ops ops;
+};
+
+struct irdma_qp_uk_init_info {
+ struct irdma_qp_quanta *sq;
+ struct irdma_qp_quanta *rq;
+ struct irdma_hw_attrs *hw_attrs;
+ u32 __iomem *wqe_alloc_db;
+ __le64 *shadow_area;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 *push_db;
+ __le64 *push_wqe;
+ u32 qp_id;
+ u32 qp_caps;
+ u32 sq_size;
+ u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
+ u8 first_sq_wq;
+};
+
+struct irdma_cq_uk_init_info {
+ u32 __iomem *cqe_alloc_db;
+ u32 __iomem *cq_ack_db;
+ struct irdma_cqe *cq_base;
+ __le64 *shadow_area;
+ u32 cq_size;
+ u32 cq_id;
+ bool avoid_mem_cflct;
+};
+
+void irdma_device_init_uk(struct irdma_dev_uk *dev);
+void irdma_qp_post_wr(struct irdma_qp_uk *qp);
+__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp,
+ u32 *wqe_idx,
+ u16 quanta,
+ u32 total_size,
+ struct irdma_post_sq_info *info);
+__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx);
+enum irdma_status_code irdma_cq_uk_init(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info);
+enum irdma_status_code irdma_qp_uk_init(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info);
+void irdma_clean_cq(void *q, struct irdma_cq_uk *cq);
+enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp,
+ u64 wr_id,
+ bool signaled,
+ bool post_sq);
+enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta);
+enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size);
+void irdma_get_wqe_shift(struct irdma_hw_attrs *hw_attrs,
+ u32 sge,
+ u32 inline_data,
+ u8 *shift);
+enum irdma_status_code irdma_get_sqdepth(struct irdma_hw_attrs *hw_attrs,
+ u32 sq_size,
+ u8 shift,
+ u32 *wqdepth);
+enum irdma_status_code irdma_get_rqdepth(struct irdma_hw_attrs *hw_attrs,
+ u32 rq_size,
+ u8 shift,
+ u32 *wqdepth);
+void irdma_qp_push_wqe(struct irdma_qp_uk *qp,
+ __le64 *wqe,
+ u16 quanta,
+ u32 wqe_idx,
+ bool post_sq);
+#endif /* IRDMA_USER_H */
--
1.8.3.1
^ permalink raw reply related
* [RFC v1 11/19] RDMA/irdma: Add PBLE resource manager
From: Shiraz Saleem @ 2019-02-15 17:10 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Implement a Physical Buffer List Entry (PBLE) resource manager
to manage a pool of PBLE HMC resource objects.
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/pble.c | 520 +++++++++++++++++++++++++++++++++++++
drivers/infiniband/hw/irdma/pble.h | 135 ++++++++++
2 files changed, 655 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/pble.c
create mode 100644 drivers/infiniband/hw/irdma/pble.h
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
new file mode 100644
index 0000000..66fab69
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "osdep.h"
+#include "status.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "pble.h"
+
+static enum irdma_status_code add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+
+/**
+ * irdma_destroy_pble_prm - destroy prm during module unload
+ * @pble_rsrc: pble resources
+ */
+void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ struct irdma_chunk *chunk;
+ struct irdma_pble_prm *pinfo = &pble_rsrc->pinfo;
+
+ while (!list_empty(&pinfo->clist)) {
+ chunk = (struct irdma_chunk *)pinfo->clist.next;
+ list_del(&chunk->list);
+ if (chunk->type == PBLE_SD_PAGED)
+ irdma_pble_free_paged_mem(chunk);
+ if (chunk->bitmapbuf)
+ irdma_free_virt_mem(dev->hw, &chunk->bitmapmem);
+ irdma_free_virt_mem(dev->hw, &chunk->chunkmem);
+ }
+}
+
+/**
+ * irdma_hmc_init_pble - Initialize pble resources during module load
+ * @dev: irdma_sc_dev struct
+ * @pble_rsrc: pble resources
+ */
+enum irdma_status_code
+irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_hmc_info *hmc_info;
+ u32 fpm_idx = 0;
+ enum irdma_status_code status = 0;
+
+ hmc_info = dev->hmc_info;
+ pble_rsrc->dev = dev;
+ pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].base;
+ /* Start pble' on 4k boundary */
+ if (pble_rsrc->fpm_base_addr & 0xfff)
+ fpm_idx = (PAGE_SIZE - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
+ pble_rsrc->unallocated_pble =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt - fpm_idx;
+ pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
+ pble_rsrc->pinfo.pble_shift = PBLE_SHIFT;
+
+ spin_lock_init(&pble_rsrc->pinfo.prm_lock);
+ INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
+ if (add_pble_prm(pble_rsrc)) {
+ irdma_destroy_pble_prm(pble_rsrc);
+ status = IRDMA_ERR_NO_MEMORY;
+ }
+
+ return status;
+}
+
+/**
+ * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
+ * @ pble_rsrc: structure containing fpm address
+ * @ idx: where to return indexes
+ */
+static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct sd_pd_idx *idx)
+{
+ idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr) /
+ IRDMA_HMC_DIRECT_BP_SIZE;
+ idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr) / IRDMA_HMC_PAGED_BP_SIZE;
+ idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD);
+}
+
+/**
+ * add_sd_direct - add sd direct for pble
+ * @pble_rsrc: pble resource ptr
+ * @info: page info for sd
+ */
+static enum irdma_status_code
+add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ enum irdma_status_code ret_code = 0;
+ struct sd_pd_idx *idx = &info->idx;
+ struct irdma_chunk *chunk = info->chunk;
+ struct irdma_hmc_info *hmc_info = info->hmc_info;
+ struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
+ u32 offset = 0;
+
+ if (!sd_entry->valid) {
+ ret_code = irdma_add_sd_table_entry(dev->hw, hmc_info,
+ info->idx.sd_idx,
+ IRDMA_SD_TYPE_DIRECT,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ return ret_code;
+
+ chunk->type = PBLE_SD_CONTIGOUS;
+ }
+
+ offset = idx->rel_pd_idx << HMC_PAGED_BP_SHIFT;
+ chunk->size = info->pages << HMC_PAGED_BP_SHIFT;
+ chunk->vaddr = (u64)((u8 *)sd_entry->u.bp.addr.va + offset);
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ irdma_debug(dev, IRDMA_DEBUG_PBLE,
+ "chunk_size[%lld] = 0x%llx vaddr=0x%llx fpm_addr = %llx\n",
+ chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
+
+ return 0;
+}
+
+/**
+ * fpm_to_idx - given fpm address, get pble index
+ * @pble_rsrc: pble resource management
+ * @addr: fpm address for index
+ */
+static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr)
+{
+ u64 idx;
+
+ idx = (addr - (pble_rsrc->fpm_base_addr)) >> 3;
+
+ return (u32)idx;
+}
+
+/**
+ * add_bp_pages - add backing pages for sd
+ * @pble_rsrc: pble resource management
+ * @info: page info for sd
+ */
+static enum irdma_status_code
+add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ u8 *addr;
+ struct irdma_dma_mem mem;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
+ struct irdma_hmc_info *hmc_info = info->hmc_info;
+ struct irdma_chunk *chunk = info->chunk;
+ enum irdma_status_code status = 0;
+ u32 rel_pd_idx = info->idx.rel_pd_idx;
+ u32 pd_idx = info->idx.pd_idx;
+ u32 i;
+
+ if (irdma_pble_get_paged_mem(chunk, info->pages))
+ return IRDMA_ERR_NO_MEMORY;
+
+ status = irdma_add_sd_table_entry(dev->hw, hmc_info,
+ info->idx.sd_idx, IRDMA_SD_TYPE_PAGED,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+
+ if (status)
+ goto error;
+
+ addr = (u8 *)chunk->vaddr;
+ for (i = 0; i < info->pages; i++) {
+ mem.pa = (u64)chunk->dmainfo.dmaaddrs[i];
+ mem.size = PAGE_SIZE;
+ mem.va = (void *)(addr);
+ pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
+ if (!pd_entry->valid) {
+ status = irdma_add_pd_table_entry(dev, hmc_info,
+ pd_idx++, &mem);
+ if (status)
+ goto error;
+
+ addr += PAGE_SIZE;
+ }
+ }
+
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ return 0;
+
+error:
+ irdma_pble_free_paged_mem(chunk);
+
+ return status;
+}
+
+/**
+ * add_pble_prm - add a sd entry for pble resoure
+ * @pble_rsrc: pble resource management
+ */
+static enum irdma_status_code add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_chunk *chunk;
+ struct irdma_add_page_info info;
+ struct sd_pd_idx *idx = &info.idx;
+ enum irdma_status_code ret_code = 0;
+ enum irdma_sd_entry_type sd_entry_type;
+ u64 sd_reg_val = 0;
+ struct irdma_virt_mem chunkmem;
+ u32 pages;
+
+ if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
+ return IRDMA_ERR_NO_MEMORY;
+
+ if (pble_rsrc->next_fpm_addr & 0xfff)
+ return IRDMA_ERR_INVALID_PAGE_DESC_INDEX;
+
+ ret_code = irdma_allocate_virt_mem(dev->hw, &chunkmem, sizeof(*chunk));
+ if (ret_code)
+ return IRDMA_ERR_NO_MEMORY;
+
+ chunk = (struct irdma_chunk *)chunkmem.va;
+ chunk->chunkmem = chunkmem;
+ hmc_info = dev->hmc_info;
+ chunk->dev = dev;
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ get_sd_pd_idx(pble_rsrc, idx);
+ sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
+ pages = (idx->rel_pd_idx) ? (IRDMA_HMC_PD_CNT_IN_SD -
+ idx->rel_pd_idx) : IRDMA_HMC_PD_CNT_IN_SD;
+ pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
+ info.chunk = chunk;
+ info.hmc_info = hmc_info;
+ info.pages = pages;
+ info.sd_entry = sd_entry;
+ if (!sd_entry->valid)
+ sd_entry_type = (!idx->rel_pd_idx &&
+ (pages == IRDMA_HMC_PD_CNT_IN_SD) &&
+ dev->is_pf) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ else
+ sd_entry_type = sd_entry->entry_type;
+
+ irdma_debug(dev, IRDMA_DEBUG_PBLE,
+ "pages = %d, unallocated_pble[%d] current_fpm_addr = %llx\n",
+ pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr);
+ irdma_debug(dev, IRDMA_DEBUG_PBLE, "sd_entry_type = %d\n",
+ sd_entry_type);
+ if (sd_entry_type == IRDMA_SD_TYPE_DIRECT)
+ ret_code = add_sd_direct(pble_rsrc, &info);
+
+ if (ret_code)
+ sd_entry_type = IRDMA_SD_TYPE_PAGED;
+ else
+ pble_rsrc->stats_direct_sds++;
+
+ if (sd_entry_type == IRDMA_SD_TYPE_PAGED) {
+ ret_code = add_bp_pages(pble_rsrc, &info);
+ if (ret_code)
+ goto error;
+ else
+ pble_rsrc->stats_paged_sds++;
+ }
+
+ ret_code = irdma_prm_add_pble_mem(&pble_rsrc->pinfo, chunk);
+ if (ret_code)
+ goto error;
+
+ pble_rsrc->next_fpm_addr += chunk->size;
+ irdma_debug(dev, IRDMA_DEBUG_PBLE,
+ "next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n",
+ pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+ pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3);
+ list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+ sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
+ if (sd_entry->valid)
+ return 0;
+
+ if (dev->is_pf) {
+ ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+ sd_reg_val, idx->sd_idx,
+ sd_entry->entry_type, true);
+ if (ret_code)
+ goto error;
+ }
+
+ sd_entry->valid = true;
+ return 0;
+
+error:
+ if (chunk->bitmapbuf)
+ irdma_free_virt_mem(dev->hw, &chunk->bitmapmem);
+
+ irdma_free_virt_mem(dev->hw, &chunk->chunkmem);
+
+ return ret_code;
+}
+
+/**
+ * free_lvl2 - fee level 2 pble
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ u32 i;
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *root = &lvl2->root;
+ struct irdma_pble_info *leaf = lvl2->leaf;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ if (leaf->addr)
+ irdma_prm_return_pbles(&pble_rsrc->pinfo,
+ &leaf->chunkinfo);
+ else
+ break;
+ }
+
+ if (root->addr)
+ irdma_prm_return_pbles(&pble_rsrc->pinfo, &root->chunkinfo);
+
+ irdma_free_virt_mem(dev->hw, &lvl2->leafmem);
+ lvl2->leaf = NULL;
+}
+
+/**
+ * get_lvl2_pble - get level 2 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static enum irdma_status_code
+get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ u32 lf4k, lflast, total, i;
+ u32 pblcnt = PBLE_PER_PAGE;
+ u64 *addr;
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *root = &lvl2->root;
+ struct irdma_pble_info *leaf;
+ enum irdma_status_code ret_code;
+ u64 fpm_addr;
+
+ /* number of full 512 (4K) leafs) */
+ lf4k = palloc->total_cnt >> 9;
+ lflast = palloc->total_cnt % PBLE_PER_PAGE;
+ total = (lflast == 0) ? lf4k : lf4k + 1;
+ lvl2->leaf_cnt = total;
+
+ ret_code = irdma_allocate_virt_mem(dev->hw, &lvl2->leafmem,
+ (sizeof(*leaf) * total));
+ if (ret_code)
+ return ret_code;
+
+ lvl2->leaf = (struct irdma_pble_info *)lvl2->leafmem.va;
+ leaf = lvl2->leaf;
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo,
+ &root->chunkinfo,
+ total << 3,
+ &root->addr,
+ &fpm_addr);
+ if (ret_code) {
+ irdma_free_virt_mem(dev->hw, &lvl2->leafmem);
+ lvl2->leaf = NULL;
+ return IRDMA_ERR_NO_MEMORY;
+ }
+
+ root->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+ root->cnt = total;
+ addr = (u64 *)root->addr;
+ for (i = 0; i < total; i++, leaf++) {
+ pblcnt = (lflast && ((i + 1) == total)) ?
+ lflast : PBLE_PER_PAGE;
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo,
+ &leaf->chunkinfo,
+ pblcnt << 3,
+ &leaf->addr,
+ &fpm_addr);
+ if (ret_code)
+ goto error;
+
+ leaf->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+
+ leaf->cnt = pblcnt;
+ *addr = (u64)leaf->idx;
+ addr++;
+ }
+
+ palloc->level = PBLE_LEVEL_2;
+ pble_rsrc->stats_lvl2++;
+ return 0;
+
+error:
+ free_lvl2(pble_rsrc, palloc);
+
+ return IRDMA_ERR_NO_MEMORY;
+}
+
+/**
+ * get_lvl1_pble - get level 1 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 1 pble allocation
+ */
+static enum irdma_status_code
+get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ enum irdma_status_code ret_code;
+ u64 fpm_addr, vaddr;
+ struct irdma_pble_info *lvl1 = &palloc->level1;
+
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo,
+ &lvl1->chunkinfo,
+ palloc->total_cnt << 3,
+ &vaddr,
+ &fpm_addr);
+ if (ret_code)
+ return IRDMA_ERR_NO_MEMORY;
+
+ lvl1->addr = vaddr;
+ palloc->level = PBLE_LEVEL_1;
+ lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+ lvl1->cnt = palloc->total_cnt;
+ pble_rsrc->stats_lvl1++;
+
+ return 0;
+}
+
+/**
+ * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @prm: pointer to general purpose special memory prm descriptor
+ */
+static enum irdma_status_code
+get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc,
+ bool level1_only)
+{
+ enum irdma_status_code status = 0;
+
+ status = get_lvl1_pble(pble_rsrc, palloc);
+ if (!status || level1_only || palloc->total_cnt <= PBLE_PER_PAGE)
+ return status;
+
+ status = get_lvl2_pble(pble_rsrc, palloc);
+
+ return status;
+}
+
+/**
+ * irdma_get_pble - allocate pbles from the prm
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pble_cnt: #of pbles requested
+ * @level1_only: true if only pble level 1 to acquire
+ */
+enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc,
+ u32 pble_cnt,
+ bool level1_only)
+{
+ enum irdma_status_code status = 0;
+ unsigned long flags;
+ int max_sds = 0;
+ int i;
+
+ palloc->total_cnt = pble_cnt;
+ palloc->level = PBLE_LEVEL_0;
+ spin_lock_irqsave(&pble_rsrc->pble_lock, flags);
+ /*check first to see if we can get pble's without acquiring
+ * additional sd's
+ */
+ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, level1_only);
+ if (!status)
+ goto exit;
+
+ max_sds = (palloc->total_cnt >> 18) + 1;
+ for (i = 0; i < max_sds; i++) {
+ status = add_pble_prm(pble_rsrc);
+ if (status)
+ break;
+
+ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, level1_only);
+ /* if level1_only, only go through it once */
+ if (!status || level1_only)
+ break;
+ }
+
+exit:
+ if (!status) {
+ pble_rsrc->allocdpbles += pble_cnt;
+ pble_rsrc->stats_alloc_ok++;
+ } else {
+ pble_rsrc->stats_alloc_fail++;
+ }
+ spin_unlock_irqrestore(&pble_rsrc->pble_lock, flags);
+
+ return status;
+}
+
+/**
+ * irdma_free_pble - put pbles back into prm
+ * @pble_rsrc: pble resources
+ * @palloc: contains all information regarding pble resource being freed
+ */
+void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ pble_rsrc->freedpbles += palloc->total_cnt;
+
+ if (palloc->level == PBLE_LEVEL_2)
+ free_lvl2(pble_rsrc, palloc);
+ else
+ irdma_prm_return_pbles(&pble_rsrc->pinfo,
+ &palloc->level1.chunkinfo);
+ pble_rsrc->stats_alloc_freed++;
+}
diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h
new file mode 100644
index 0000000..f9ecc87
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/pble.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_PBLE_H
+#define IRDMA_PBLE_H
+
+#define PBLE_SHIFT 6
+#define PBLE_PER_PAGE 512
+#define HMC_PAGED_BP_SHIFT 12
+#define PBLE_512_SHIFT 9
+#define PBLE_INVALID_IDX 0xffffffff
+
+enum irdma_pble_level {
+ PBLE_LEVEL_0 = 0,
+ PBLE_LEVEL_1 = 1,
+ PBLE_LEVEL_2 = 2,
+};
+
+enum irdma_alloc_type {
+ PBLE_NO_ALLOC = 0,
+ PBLE_SD_CONTIGOUS = 1,
+ PBLE_SD_PAGED = 2,
+};
+
+struct irdma_chunk;
+
+struct irdma_pble_chunkinfo {
+ struct irdma_chunk *pchunk;
+ u64 bit_idx;
+ u64 bits_used;
+};
+
+struct irdma_pble_info {
+ u64 addr;
+ u32 idx;
+ u32 cnt;
+ struct irdma_pble_chunkinfo chunkinfo;
+};
+
+struct irdma_pble_level2 {
+ struct irdma_pble_info root;
+ struct irdma_pble_info *leaf;
+ struct irdma_virt_mem leafmem;
+ u32 leaf_cnt;
+};
+
+struct irdma_pble_alloc {
+ u32 total_cnt;
+ enum irdma_pble_level level;
+ union {
+ struct irdma_pble_info level1;
+ struct irdma_pble_level2 level2;
+ };
+};
+
+struct sd_pd_idx {
+ u32 sd_idx;
+ u32 pd_idx;
+ u32 rel_pd_idx;
+};
+
+struct irdma_add_page_info {
+ struct irdma_chunk *chunk;
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_hmc_info *hmc_info;
+ struct sd_pd_idx idx;
+ u32 pages;
+};
+
+struct irdma_chunk {
+ struct list_head list;
+ struct irdma_dma_info dmainfo;
+ void *bitmapbuf;
+
+ u32 sizeofbitmap;
+ u64 size;
+ u64 vaddr;
+ u64 fpm_addr;
+ u32 pg_cnt;
+ enum irdma_alloc_type type;
+ struct irdma_sc_dev *dev;
+ struct irdma_virt_mem bitmapmem;
+ struct irdma_virt_mem chunkmem;
+};
+
+struct irdma_pble_prm {
+ struct list_head clist;
+ spinlock_t prm_lock; /* protect prm bitmap */
+ u64 total_pble_alloc;
+ u64 free_pble_cnt;
+ u8 pble_shift;
+};
+
+struct irdma_hmc_pble_rsrc {
+ u32 unallocated_pble;
+ spinlock_t pble_lock; /* to serialize PBLE resource acquisition */
+ struct irdma_sc_dev *dev;
+ u64 fpm_base_addr;
+ u64 next_fpm_addr;
+ struct irdma_pble_prm pinfo;
+ u64 allocdpbles;
+ u64 freedpbles;
+ u32 stats_direct_sds;
+ u32 stats_paged_sds;
+ u64 stats_alloc_ok;
+ u64 stats_alloc_fail;
+ u64 stats_alloc_freed;
+ u64 stats_lvl1;
+ u64 stats_lvl2;
+};
+
+void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+enum irdma_status_code irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc);
+void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc);
+enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc,
+ u32 pble_cnt,
+ bool level1_only);
+enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk);
+enum irdma_status_code irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo,
+ u32 mem_size,
+ u64 *vaddr,
+ u64 *fpm_addr);
+void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo);
+void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, unsigned long *flags);
+void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, unsigned long *flags);
+void irdma_pble_free_paged_mem(struct irdma_chunk *chunk);
+enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk,
+ int pg_cnt);
+#endif /* IRDMA_PBLE_H */
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH RFC] net: bridge: don't flood known multicast traffic when snooping is enabled
From: Linus Lüssing @ 2019-02-15 17:13 UTC (permalink / raw)
To: Nikolay Aleksandrov
Cc: netdev, roopa, wkok, anuradhak, bridge, davem, stephen
In-Reply-To: <20190215130427.29824-1-nikolay@cumulusnetworks.com>
On Fri, Feb 15, 2019 at 03:04:27PM +0200, Nikolay Aleksandrov wrote:
> Every user would expect to have traffic forwarded only to the configured
> mdb destination when snooping is enabled, instead now to get that one
> needs to enable both snooping and querier. Enabling querier on all
> switches could be problematic and is not a good solution,
There is no need to set the querier on all snooping switches.
br_multicast_querier_exists() checks if a querier exists on the
link in general, not if this particular host/bridge is a querier.
> for example as summarized by our multicast experts:
> "every switch would send an IGMP query
What? RFC3810, section 7.1 says:
"If it is the case, a querier election mechanism (described in
section 7.6.2) is used to elect a single multicast router to be
in Querier state. [...] Nevertheless, it is only the [elected] Querier
that sends periodical or triggered query messages on the subnet."
> for any random multicast traffic it
> received across the entire domain and it would send it forever as long as a
> host exists wanting that stream even if it has no downstream/directly
> connected receivers"
Queries are not send for "any random multicast traffic", but
either periodically (general query) or in response to changes on
multicast address listener state (multicast address specific query).
More precisely, if a host leaves a group and sends an IGMPv3/MLDv2 report
or IGMPv2/MLDv1 "Leave Group"/"Done" message for that.
> Sending as an RFC to get the discussion going, but I'm strongly for
> removing this behaviour and would like to send this patch officially.
While reading the code I'm getting confused with the mrouters_only
flag again... (if I remember correctly it never did what its name
implied) I'd have to test / play with your change to check,
but maybe you have tested these scenarios already:
What happens if:
- no querier exists on the link
- you have added static MDB entries from userspace
=> will only ports with statically configured MDB entries receive
multicast traffic? what happens to other ports?
=> with no queries, those other ports will stay rather silent,
they will not send any reports
=> do they miss multicast traffic / will IPv6 (ND) break for
them?
And what happens if:
- no querier exists on the link
- one port gets an unsolicited MLD report, i.e. because a host has just
started to listen to a particular multicast address
=> will only this port receive multicast traffic? what happens to
other ports that have listeners for the same multicast group?
(and what currently confuses me while reading the code if a
- a querier exists
- but no listener for a particular IPv6 group / no mdst
- for IPv6 link-local multicast traffic (so mrouters_only = 0?)
=> will this result in always flooding multicast traffic for
this particular IPv6 link-local multicast group to all ports?
=> reading the code it seems like, but I had remembered it
differently; for IPv4 this makes sense, as IGMP is not
mandatory for link-local addresse, however for IPv6 this
seems unnecessary, dropping should be the correct approach
if an MLD querier exists)
Have you done some tests with this change yet, Nikolay?
Regards, Linus
^ permalink raw reply
* [RFC v1 10/19] RDMA/irdma: Add connection manager
From: Shiraz Saleem @ 2019-02-15 17:10 UTC (permalink / raw)
To: dledford, jgg, davem
Cc: linux-rdma, netdev, mustafa.ismail, jeffrey.t.kirsher,
Shiraz Saleem
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
From: Mustafa Ismail <mustafa.ismail@intel.com>
Add connection management (CM) implementation for
iWARP including accept, reject, connect, create_listen,
destroy_listen and CM utility functions
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
---
drivers/infiniband/hw/irdma/cm.c | 4622 ++++++++++++++++++++++++++++++++++++++
drivers/infiniband/hw/irdma/cm.h | 424 ++++
2 files changed, 5046 insertions(+)
create mode 100644 drivers/infiniband/hw/irdma/cm.c
create mode 100644 drivers/infiniband/hw/irdma/cm.h
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
new file mode 100644
index 0000000..b194087
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -0,0 +1,4622 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/highmem.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include <net/secure_seq.h>
+
+#include "main.h"
+#include "trace.h"
+
+static void irdma_rem_ref_cm_node(struct irdma_cm_node *);
+static void irdma_cm_post_event(struct irdma_cm_event *event);
+static void irdma_disconnect_worker(struct work_struct *work);
+
+/**
+ * irdma_free_sqbuf - put back puda buffer if refcount is 0
+ * @dev: HW device
+ * @buf: puda buffer to free
+ */
+void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp)
+{
+ struct irdma_puda_buf *buf = (struct irdma_puda_buf *)bufp;
+ struct irdma_puda_rsrc *ilq = vsi->ilq;
+
+ if (!atomic_dec_return(&buf->refcount))
+ irdma_puda_ret_bufpool(ilq, buf);
+}
+
+/**
+ * irdma_derive_hw_ird_setting - Calculate IRD
+ * @cm_ird: IRD of connection's node
+ *
+ * The ird from the connection is rounded to a supported HW
+ * setting (2,8,32,64,128) and then encoded for ird_size field
+ * of qp_ctx
+ */
+u8 irdma_derive_hw_ird_setting(u16 cm_ird)
+{
+ /* ird_size field is encoded in qp_ctx */
+ switch (cm_ird ? roundup_pow_of_two(cm_ird) : 0) {
+ case IRDMA_HW_IRD_SETTING_128:
+ return 4;
+ case IRDMA_HW_IRD_SETTING_64:
+ return 3;
+ case IRDMA_HW_IRD_SETTING_32:
+ case IRDMA_HW_IRD_SETTING_16:
+ return 2;
+ case IRDMA_HW_IRD_SETTING_8:
+ case IRDMA_HW_IRD_SETTING_4:
+ return 1;
+ case IRDMA_HW_IRD_SETTING_2:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_record_ird_ord - Record IRD/ORD passed in
+ * @cm_node: connection's node
+ * @conn_ird: connection IRD
+ * @conn_ord: connection ORD
+ */
+static void irdma_record_ird_ord(struct irdma_cm_node *cm_node,
+ u32 conn_ird,
+ u32 conn_ord)
+{
+ if (conn_ird > cm_node->dev->hw_attrs.max_hw_ird)
+ cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird;
+
+ if (conn_ord > cm_node->dev->hw_attrs.max_hw_ord)
+ cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord;
+ else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
+ cm_node->ord_size = 1;
+}
+
+/**
+ * irdma_copy_ip_ntohl - copy IP address from network to host
+ * @dst: IP address in host order
+ * @src: IP address in network order (big endian)
+ */
+void irdma_copy_ip_ntohl(u32 *dst, __be32 *src)
+{
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst = ntohl(*src);
+}
+
+/**
+ * irdma_copy_ip_htonl - copy IP address from host to network order
+ * @dst: IP address in network order (big endian)
+ * @src: IP address in host order
+ */
+void irdma_copy_ip_htonl(__be32 *dst, u32 *src)
+{
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst = htonl(*src);
+}
+
+/**
+ * irdma_get_addr_info
+ * @cm_node: contains ip/tcp info
+ * @cm_info: to get a copy of the cm_node ip/tcp info
+ */
+static void irdma_get_addr_info(struct irdma_cm_node *cm_node,
+ struct irdma_cm_info *cm_info)
+{
+ cm_info->ipv4 = cm_node->ipv4;
+ cm_info->vlan_id = cm_node->vlan_id;
+ memcpy(cm_info->loc_addr, cm_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ memcpy(cm_info->rem_addr, cm_node->rem_addr,
+ sizeof(cm_info->rem_addr));
+ cm_info->loc_port = cm_node->loc_port;
+ cm_info->rem_port = cm_node->rem_port;
+ cm_info->user_pri = cm_node->user_pri;
+}
+
+/**
+ * irdma_fill_sockaddr4 - fill in addr info for IPv4 connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void irdma_fill_sockaddr4(struct irdma_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in *laddr = (struct sockaddr_in *)
+ &event->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)
+ &event->remote_addr;
+
+ laddr->sin_family = AF_INET;
+ raddr->sin_family = AF_INET;
+
+ laddr->sin_port = htons(cm_node->loc_port);
+ raddr->sin_port = htons(cm_node->rem_port);
+
+ laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
+ raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
+}
+
+/**
+ * irdma_fill_sockaddr6 - fill in addr info for IPv6 connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void irdma_fill_sockaddr6(struct irdma_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
+ &event->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
+ &event->remote_addr;
+
+ laddr6->sin6_family = AF_INET6;
+ raddr6->sin6_family = AF_INET6;
+
+ laddr6->sin6_port = htons(cm_node->loc_port);
+ raddr6->sin6_port = htons(cm_node->rem_port);
+
+ irdma_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ irdma_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+}
+
+/**
+ * irdma_get_cmevent_info - for cm event upcall
+ * @cm_node: connection's node
+ * @cm_id: upper layers cm struct for the event
+ * @event: upper layer's cm event
+ */
+static inline void irdma_get_cmevent_info(struct irdma_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ struct iw_cm_event *event)
+{
+ memcpy(&event->local_addr, &cm_id->m_local_addr,
+ sizeof(event->local_addr));
+ memcpy(&event->remote_addr, &cm_id->m_remote_addr,
+ sizeof(event->remote_addr));
+ if (cm_node) {
+ event->private_data = (void *)cm_node->pdata_buf;
+ event->private_data_len = (u8)cm_node->pdata.size;
+ event->ird = cm_node->ird_size;
+ event->ord = cm_node->ord_size;
+ }
+}
+
+/**
+ * irdma_send_cm_event - upcall cm's event handler
+ * @cm_node: connection's node
+ * @cm_id: upper layer's cm info struct
+ * @type: Event type to indicate
+ * @status: status for the event type
+ */
+static int irdma_send_cm_event(struct irdma_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type,
+ int status)
+{
+ struct iw_cm_event event = {};
+
+ event.event = type;
+ event.status = status;
+ switch (type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ trace_irdma_send_cm_event(cm_node, cm_id, type, status,
+ __builtin_return_address(0));
+ if (cm_node->ipv4)
+ irdma_fill_sockaddr4(cm_node, &event);
+ else
+ irdma_fill_sockaddr6(cm_node, &event);
+ event.provider_data = (void *)cm_node;
+ event.private_data = (void *)cm_node->pdata_buf;
+ event.private_data_len = (u8)cm_node->pdata.size;
+ event.ird = cm_node->ird_size;
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ trace_irdma_send_cm_event(cm_node, cm_id, type, status,
+ __builtin_return_address(0));
+ irdma_get_cmevent_info(cm_node, cm_id, &event);
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ trace_irdma_send_cm_event(cm_node, cm_id, type, status,
+ __builtin_return_address(0));
+ event.ird = cm_node->ird_size;
+ event.ord = cm_node->ord_size;
+ break;
+ case IW_CM_EVENT_DISCONNECT:
+ trace_irdma_send_cm_event_no_node(cm_id, type, status,
+ __builtin_return_address(0));
+ break;
+ case IW_CM_EVENT_CLOSE:
+ trace_irdma_send_cm_event_no_node(cm_id, type, status,
+ __builtin_return_address(0));
+ break;
+ default:
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "Unsupported event type received type = %d\n",
+ type);
+ return -1;
+ }
+
+ return cm_id->event_handler(cm_id, &event);
+}
+
+/**
+ * irdma_create_event - create cm event
+ * @cm_node: connection's node
+ * @type: Event type to generate
+ */
+static struct irdma_cm_event *irdma_create_event(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type)
+{
+ struct irdma_cm_event *event;
+
+ if (!cm_node->cm_id)
+ return NULL;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+ if (!event)
+ return NULL;
+
+ event->type = type;
+ event->cm_node = cm_node;
+ memcpy(event->cm_info.rem_addr, cm_node->rem_addr,
+ sizeof(event->cm_info.rem_addr));
+ memcpy(event->cm_info.loc_addr, cm_node->loc_addr,
+ sizeof(event->cm_info.loc_addr));
+ event->cm_info.rem_port = cm_node->rem_port;
+ event->cm_info.loc_port = cm_node->loc_port;
+ event->cm_info.cm_id = cm_node->cm_id;
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
+ cm_node,
+ event,
+ type,
+ event->cm_info.loc_addr,
+ event->cm_info.rem_addr);
+ trace_irdma_create_event(cm_node, type, __builtin_return_address(0));
+ irdma_cm_post_event(event);
+
+ return event;
+}
+
+/**
+ * irdma_free_retrans_entry - free send entry
+ * @cm_node: connection's node
+ */
+static void irdma_free_retrans_entry(struct irdma_cm_node *cm_node)
+{
+ struct irdma_device *iwdev = cm_node->iwdev;
+ struct irdma_timer_entry *send_entry;
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ return;
+
+ cm_node->send_entry = NULL;
+ irdma_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf);
+ kfree(send_entry);
+ atomic_dec(&cm_node->ref_count);
+}
+
+/**
+ * irdma_cleanup_retrans_entry - free send entry with lock
+ * @cm_node: connection's node
+ */
+static void irdma_cleanup_retrans_entry(struct irdma_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ irdma_free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+/**
+ * irdma_form_ah_cm_frame - get a free packet and build frame with address handle
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct irdma_puda_buf *irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_kmem_info *pdata,
+ u8 flags)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ u8 *buf;
+ struct tcphdr *tcph;
+ u16 pktsize;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ if (!cm_node->ah || !cm_node->ah->ah_info.ah_valid) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR, "AH invalid\n");
+ return NULL;
+ }
+
+ sqbuf = irdma_puda_get_bufpool(vsi->ilq);
+ if (!sqbuf) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR, "SQ buf NULL\n");
+ return NULL;
+ }
+
+ sqbuf->ah_id = cm_node->ah->ah_info.ah_idx;
+ buf = sqbuf->mem.va;
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata)
+ pd_len = pdata->size;
+
+ pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len;
+
+ memset(buf, 0, pktsize);
+
+ sqbuf->totallen = pktsize;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = (void *)cm_node;
+
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
+
+ atomic_set(&sqbuf->refcount, 1);
+
+ irdma_debug_buf(vsi->dev,
+ IRDMA_DEBUG_ILQ,
+ "TRANSMIT ILQ BUFFER",
+ sqbuf->mem.va,
+ sqbuf->totallen);
+
+ return sqbuf;
+}
+
+/**
+ * irdma_form_uda_cm_frame - get a free packet and build frame full tcpip packet
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct irdma_puda_buf *irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_kmem_info *pdata,
+ u8 flags)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ u8 *buf;
+
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct ethhdr *ethh;
+ u16 pktsize;
+ u16 eth_hlen = ETH_HLEN;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ u16 vtag;
+
+ sqbuf = irdma_puda_get_bufpool(vsi->ilq);
+ if (!sqbuf)
+ return NULL;
+
+ buf = sqbuf->mem.va;
+
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata)
+ pd_len = pdata->size;
+
+ if (cm_node->vlan_id <= VLAN_VID_MASK)
+ eth_hlen += 4;
+
+ if (cm_node->ipv4)
+ pktsize = sizeof(*iph) + sizeof(*tcph);
+ else
+ pktsize = sizeof(*ip6h) + sizeof(*tcph);
+ pktsize += opts_len + hdr_len + pd_len;
+
+ memset(buf, 0, eth_hlen + pktsize);
+
+ sqbuf->totallen = pktsize + eth_hlen;
+ sqbuf->maclen = eth_hlen;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = (void *)cm_node;
+
+ ethh = (struct ethhdr *)buf;
+ buf += eth_hlen;
+
+ if (cm_node->ipv4) {
+ sqbuf->ipv4 = true;
+
+ iph = (struct iphdr *)buf;
+ buf += sizeof(*iph);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id <= VLAN_VID_MASK) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto =
+ htons(ETH_P_8021Q);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT)
+ | cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
+
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto =
+ htons(ETH_P_IP);
+ } else {
+ ethh->h_proto = htons(ETH_P_IP);
+ }
+
+ iph->version = IPVERSION;
+ iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
+ iph->tos = cm_node->tos;
+ iph->tot_len = htons(pktsize);
+ iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+ iph->frag_off = htons(0x4000);
+ iph->ttl = 0x40;
+ iph->protocol = IPPROTO_TCP;
+ iph->saddr = htonl(cm_node->loc_addr[0]);
+ iph->daddr = htonl(cm_node->rem_addr[0]);
+ } else {
+ sqbuf->ipv4 = false;
+ ip6h = (struct ipv6hdr *)buf;
+ buf += sizeof(*ip6h);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id <= VLAN_VID_MASK) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto =
+ htons(ETH_P_8021Q);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT)
+ | cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto =
+ htons(ETH_P_IPV6);
+ } else {
+ ethh->h_proto = htons(ETH_P_IPV6);
+ }
+ ip6h->version = 6;
+ ip6h->priority = cm_node->tos >> 4;
+ ip6h->flow_lbl[0] = cm_node->tos << 4;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->payload_len = htons(pktsize - sizeof(*ip6h));
+ ip6h->nexthdr = 6;
+ ip6h->hop_limit = 128;
+ irdma_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ irdma_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+ }
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
+
+ atomic_set(&sqbuf->refcount, 1);
+
+ irdma_debug_buf(vsi->dev,
+ IRDMA_DEBUG_ILQ,
+ "TRANSMIT ILQ BUFFER",
+ sqbuf->mem.va,
+ sqbuf->totallen);
+
+ return sqbuf;
+}
+
+/**
+ * irdma_send_reset - Send RST packet
+ * @cm_node: connection's node
+ */
+int irdma_send_reset(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+ int flags = SET_RST | SET_ACK;
+
+ trace_irdma_send_reset(cm_node, 0, __builtin_return_address(0));
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, flags);
+ if (!sqbuf)
+ return -1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf,
+ IRDMA_TIMER_TYPE_SEND, 0, 1);
+}
+
+/**
+ * irdma_active_open_err - send event for active side cm error
+ * @cm_node: connection's node
+ * @reset: Flag to send reset or not
+ */
+static void irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset)
+{
+ trace_irdma_active_open_err(cm_node, reset,
+ __builtin_return_address(0));
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_connect_errs++;
+ if (reset) {
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "cm_node=%p state=%d\n",
+ cm_node,
+ cm_node->state);
+ atomic_inc(&cm_node->ref_count);
+ irdma_send_reset(cm_node);
+ }
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+}
+
+/**
+ * irdma_passive_open_err - handle passive side cm error
+ * @cm_node: connection's node
+ * @reset: send reset or just free cm_node
+ */
+static void irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset)
+{
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_passive_errs++;
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "cm_node=%p state =%d\n",
+ cm_node,
+ cm_node->state);
+ trace_irdma_passive_open_err(cm_node, reset,
+ __builtin_return_address(0));
+ if (reset)
+ irdma_send_reset(cm_node);
+ else
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * irdma_event_connect_error - to create connect error event
+ * @event: cm information for connect event
+ */
+static void irdma_event_connect_error(struct irdma_cm_event *event)
+{
+ struct irdma_qp *iwqp;
+ struct iw_cm_id *cm_id;
+
+ cm_id = event->cm_node->cm_id;
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+
+ if (!iwqp || !iwqp->iwdev)
+ return;
+
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ irdma_send_cm_event(event->cm_node, cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ cm_id->rem_ref(cm_id);
+ irdma_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * irdma_process_options - process options from TCP header
+ * @cm_node: connection's node
+ * @optionsloc: point to start of options
+ * @optionsize: size of all options
+ * @syn_packet: flag if syn packet
+ */
+static int irdma_process_options(struct irdma_cm_node *cm_node,
+ u8 *optionsloc,
+ u32 optionsize,
+ u32 syn_pkt)
+{
+ u32 tmp;
+ u32 offset = 0;
+ union all_known_options *all_options;
+ char got_mss_option = 0;
+
+ while (offset < optionsize) {
+ all_options = (union all_known_options *)(optionsloc + offset);
+ switch (all_options->base.optionnum) {
+ case OPTION_NUM_EOL:
+ offset = optionsize;
+ break;
+ case OPTION_NUM_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUM_MSS:
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "MSS Length: %d Offset: %d Size: %d\n",
+ all_options->mss.len,
+ offset,
+ optionsize);
+ got_mss_option = 1;
+ if (all_options->mss.len != 4)
+ return -EINVAL;
+ tmp = ntohs(all_options->mss.mss);
+ if ((cm_node->ipv4 &&
+ (tmp + IRDMA_MTU_TO_MSS_IPV4) < IRDMA_MIN_MTU_IPV4) ||
+ (!cm_node->ipv4 &&
+ (tmp + IRDMA_MTU_TO_MSS_IPV6) < IRDMA_MIN_MTU_IPV6))
+ return -EINVAL;
+ if (tmp < cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ break;
+ case OPTION_NUM_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale = all_options->windowscale.shiftcount;
+ break;
+ default:
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "Unsupported TCP Option: %x\n",
+ all_options->base.optionnum);
+ break;
+ }
+ offset += all_options->base.len;
+ }
+ if (!got_mss_option && syn_pkt)
+ cm_node->tcp_cntxt.mss = IRDMA_CM_DEFAULT_MSS;
+
+ return 0;
+}
+
+/**
+ * irdma_handle_tcp_options - setup TCP context info after parsing TCP options
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ * @optionsize: size of options rcvd
+ * @passive: active or passive flag
+ */
+static int irdma_handle_tcp_options(struct irdma_cm_node *cm_node,
+ struct tcphdr *tcph,
+ int optionsize,
+ int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
+
+ if (optionsize) {
+ if (irdma_process_options(cm_node,
+ optionsloc,
+ optionsize,
+ (u32)tcph->syn)) {
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "Node %p, Sending Reset\n",
+ cm_node);
+ if (passive)
+ irdma_passive_open_err(cm_node, true);
+ else
+ irdma_active_open_err(cm_node, true);
+ return -1;
+ }
+ }
+
+ cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+ cm_node->tcp_cntxt.snd_wscale;
+
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+ cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+
+ return 0;
+}
+
+/**
+ * irdma_build_mpa_v1 - build a MPA V1 frame
+ * @cm_node: connection's node
+ * @mpa_key: to do read0 or write0
+ */
+static void irdma_build_mpa_v1(struct irdma_cm_node *cm_node,
+ void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+ break;
+ case MPA_KEY_REPLY:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+ break;
+ default:
+ break;
+ }
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = cm_node->mpa_frame_rev;
+ mpa_frame->priv_data_len = htons(cm_node->pdata.size);
+}
+
+/**
+ * irdma_build_mpa_v2 - build a MPA V2 frame
+ * @cm_node: connection's node
+ * @start_addr: buffer start address
+ * @mpa_key: to do read0 or write0
+ */
+static void irdma_build_mpa_v2(struct irdma_cm_node *cm_node,
+ void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+ struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+ u16 ctrl_ird, ctrl_ord;
+
+ /* initialize the upper 5 bytes of the frame */
+ irdma_build_mpa_v1(cm_node, start_addr, mpa_key);
+ mpa_frame->flags |= IETF_MPA_V2_FLAG;
+ mpa_frame->priv_data_len = cpu_to_be16(be16_to_cpu(mpa_frame->priv_data_len) +
+ IETF_RTR_MSG_SIZE);
+
+ /* initialize RTR msg */
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD : cm_node->ird_size;
+ ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD : cm_node->ord_size;
+ }
+ ctrl_ird |= IETF_PEER_TO_PEER;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ case MPA_KEY_REPLY:
+ switch (cm_node->send_rdma0_op) {
+ case SEND_RDMA_WRITE_ZERO:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ break;
+ case SEND_RDMA_READ_ZERO:
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ rtr_msg->ctrl_ird = htons(ctrl_ird);
+ rtr_msg->ctrl_ord = htons(ctrl_ord);
+}
+
+/**
+ * irdma_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
+ * @cm_node: connection's node
+ * @mpa: mpa: data buffer
+ * @mpa_key: to do read0 or write0
+ */
+static int irdma_cm_build_mpa_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *mpa,
+ u8 mpa_key)
+{
+ int hdr_len = 0;
+
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V1:
+ hdr_len = sizeof(struct ietf_mpa_v1);
+ irdma_build_mpa_v1(cm_node, mpa->addr, mpa_key);
+ break;
+ case IETF_MPA_V2:
+ hdr_len = sizeof(struct ietf_mpa_v2);
+ irdma_build_mpa_v2(cm_node, mpa->addr, mpa_key);
+ break;
+ default:
+ break;
+ }
+
+ return hdr_len;
+}
+
+/**
+ * irdma_send_mpa_request - active node send mpa request to passive node
+ * @cm_node: connection's node
+ */
+static int irdma_send_mpa_request(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+
+ if (!cm_node) {
+ irdma_pr_err("cm_node == NULL\n");
+ return -1;
+ }
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+ cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REQUEST);
+ if (!cm_node->mpa_hdr.size) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR, "mpa size = %d\n",
+ cm_node->mpa_hdr.size);
+ return -1;
+ }
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node,
+ NULL,
+ &cm_node->mpa_hdr,
+ &cm_node->pdata,
+ SET_ACK);
+ if (!sqbuf)
+ return -1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf,
+ IRDMA_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * irdma_send_mpa_reject -
+ * @cm_node: connection's node
+ * @pdata: reject data for connection
+ * @plen: length of reject data
+ */
+static int irdma_send_mpa_reject(struct irdma_cm_node *cm_node,
+ const void *pdata,
+ u8 plen)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_kmem_info priv_info;
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+ cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REPLY);
+
+ cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
+ priv_info.addr = (void *)pdata;
+ priv_info.size = plen;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node,
+ NULL,
+ &cm_node->mpa_hdr,
+ &priv_info,
+ SET_ACK | SET_FIN);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ cm_node->state = IRDMA_CM_STATE_FIN_WAIT1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf,
+ IRDMA_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * irdma_mpav2_negotiate_ird_ord - negotiate MPAv2 IRD/ORD
+ * @cm_node: connection's node
+ * @buffer: Data pointer
+ */
+static int irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node, u8 *buf)
+{
+ struct ietf_mpa_v2 *mpa_v2_frame;
+ struct ietf_rtr_msg *rtr_msg;
+ u16 ird_size;
+ u16 ord_size;
+ u16 ctrl_ord;
+ u16 ctrl_ird;
+
+ mpa_v2_frame = (struct ietf_mpa_v2 *)buf;
+ rtr_msg = &mpa_v2_frame->rtr_msg;
+
+ /* parse rtr message */
+ ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+ ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+ ird_size = ctrl_ird & IETF_NO_IRD_ORD;
+ ord_size = ctrl_ord & IETF_NO_IRD_ORD;
+
+ if (!(ctrl_ird & IETF_PEER_TO_PEER))
+ return -1;
+
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+ goto negotiate_done;
+ }
+
+ if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
+ /* responder */
+ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
+ cm_node->ird_size = 1;
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+ } else {
+ /* initiator */
+ if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
+ /* Remote peer doesn't support RDMA0_READ */
+ return -1;
+
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+
+ if (cm_node->ird_size < ord_size)
+ /* no resources available */
+ return -1;
+ }
+
+negotiate_done:
+ if (ctrl_ord & IETF_RDMA0_READ)
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ else if (ctrl_ord & IETF_RDMA0_WRITE)
+ cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+ else
+ /* Not supported RDMA0 operation */
+ return -1;
+
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_CM,
+ "MPAV2 Negotiated ORD: %d, IRD: %d\n",
+ cm_node->ord_size, cm_node->ird_size);
+ trace_irdma_negotiate_mpa_v2(cm_node);
+ return 0;
+}
+
+/**
+ * recv_mpa - process an IETF MPA frame
+ * @cm_node: connection's node
+ * @buffer: Data pointer
+ * @type: to return accept or reject
+ * @len: Len of mpa buffer
+ */
+static int irdma_parse_mpa(struct irdma_cm_node *cm_node,
+ u8 *buf,
+ u32 *type,
+ u32 len)
+{
+ struct ietf_mpa_v1 *mpa_frame;
+ int mpa_hdr_len;
+ int priv_data_len;
+
+ *type = IRDMA_MPA_REQUEST_ACCEPT;
+
+ if (len < sizeof(struct ietf_mpa_v1)) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "ietf buffer small (%x)\n", len);
+ return -1;
+ }
+
+ mpa_frame = (struct ietf_mpa_v1 *)buf;
+ mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+ priv_data_len = ntohs(mpa_frame->priv_data_len);
+
+ if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "large pri_data %d\n", priv_data_len);
+ return -1;
+ }
+
+ if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "unsupported mpa rev = %d\n", mpa_frame->rev);
+ return -1;
+ }
+
+ if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "rev %d\n", mpa_frame->rev);
+ return -1;
+ }
+
+ cm_node->mpa_frame_rev = mpa_frame->rev;
+ if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ,
+ IETF_MPA_KEY_SIZE)) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "Unexpected MPA Key received\n");
+ return -1;
+ }
+ } else {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP,
+ IETF_MPA_KEY_SIZE)) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "Unexpected MPA Key received\n");
+ return -1;
+ }
+ }
+
+ if (priv_data_len + mpa_hdr_len > len) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "ietf buffer len(%x + %x != %x)\n",
+ priv_data_len, mpa_hdr_len, len);
+ return -1;
+ }
+
+ if (len > IRDMA_MAX_CM_BUF) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "ietf buffer large len = %d\n", len);
+ return -1;
+ }
+
+ switch (mpa_frame->rev) {
+ case IETF_MPA_V2:
+ mpa_hdr_len += IETF_RTR_MSG_SIZE;
+ if (irdma_negotiate_mpa_v2_ird_ord(cm_node, buf))
+ return -1;
+ break;
+ case IETF_MPA_V1:
+ default:
+ break;
+ }
+
+ memcpy(cm_node->pdata_buf, buf + mpa_hdr_len, priv_data_len);
+ cm_node->pdata.size = priv_data_len;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+ *type = IRDMA_MPA_REQUEST_REJECT;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
+ cm_node->snd_mark_en = true;
+
+ return 0;
+}
+
+/**
+ * irdma_schedule_cm_timer
+ * @cm_node: connection's node
+ * @sqbuf: buffer to send
+ * @type: if it is send or close
+ * @send_retrans: if rexmits to be done
+ * @close_when_complete: is cm_node to be removed
+ *
+ * note - cm_node needs to be protected before calling this. Encase in:
+ * irdma_rem_ref_cm_node(cm_core, cm_node);
+ * irdma_schedule_cm_timer(...)
+ * atomic_inc(&cm_node->ref_count);
+ */
+int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *sqbuf,
+ enum irdma_timer_type type,
+ int send_retrans,
+ int close_when_complete)
+{
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_timer_entry *new_send;
+ u32 was_timer_set;
+ unsigned long flags;
+
+ new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+ if (!new_send) {
+ if (type != IRDMA_TIMER_TYPE_CLOSE)
+ irdma_free_sqbuf(vsi, (void *)sqbuf);
+ return -ENOMEM;
+ }
+
+ new_send->retrycount = IRDMA_DEFAULT_RETRYS;
+ new_send->retranscount = IRDMA_DEFAULT_RETRANS;
+ new_send->sqbuf = sqbuf;
+ new_send->timetosend = jiffies;
+ new_send->type = type;
+ new_send->send_retrans = send_retrans;
+ new_send->close_when_complete = close_when_complete;
+
+ if (type == IRDMA_TIMER_TYPE_CLOSE) {
+ new_send->timetosend += (HZ / 10);
+ if (cm_node->close_entry) {
+ kfree(new_send);
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "already close entry\n");
+ return -EINVAL;
+ }
+
+ cm_node->close_entry = new_send;
+ }
+
+ if (type == IRDMA_TIMER_TYPE_SEND) {
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ atomic_inc(&cm_node->ref_count);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT;
+
+ atomic_inc(&sqbuf->refcount);
+ irdma_puda_send_buf(vsi->ilq, sqbuf);
+ if (!send_retrans) {
+ irdma_cleanup_retrans_entry(cm_node);
+ if (close_when_complete)
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+ }
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+ if (!was_timer_set) {
+ cm_core->tcp_timer.expires = new_send->timetosend;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_retrans_expired - Could not rexmit the packet
+ * @cm_node: connection's node
+ */
+static void irdma_retrans_expired(struct irdma_cm_node *cm_node)
+{
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ enum irdma_cm_node_state state = cm_node->state;
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ switch (state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_CLOSING:
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_LAST_ACK:
+ if (cm_node->cm_id)
+ cm_id->rem_ref(cm_id);
+ irdma_send_reset(cm_node);
+ break;
+ default:
+ atomic_inc(&cm_node->ref_count);
+ irdma_send_reset(cm_node);
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+ break;
+ }
+}
+
+/**
+ * irdma_handle_close_entry - for handling retry/timeouts
+ * @cm_node: connection's node
+ * @rem_node: flag for remove cm_node
+ */
+static void irdma_handle_close_entry(struct irdma_cm_node *cm_node,
+ u32 rem_node)
+{
+ struct irdma_timer_entry *close_entry = cm_node->close_entry;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct irdma_qp *iwqp;
+ unsigned long flags;
+
+ if (!close_entry)
+ return;
+ iwqp = (struct irdma_qp *)close_entry->sqbuf;
+ if (iwqp) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->cm_id) {
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->hw_iwarp_state = IRDMA_QP_STATE_ERROR;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ iwqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_cm_disconn(iwqp);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else if (rem_node) {
+ /* TIME_WAIT state */
+ irdma_rem_ref_cm_node(cm_node);
+ }
+ if (cm_id)
+ cm_id->rem_ref(cm_id);
+ kfree(close_entry);
+ cm_node->close_entry = NULL;
+}
+
+/**
+ * irdma_build_timer_list - Add cm_nodes to timer list
+ * @timer_list: ptr to timer list
+ * @hte: ptr to accelerated or non-accelerated list
+ */
+static void irdma_build_timer_list(struct list_head *timer_list,
+ struct list_head *hte)
+{
+ struct irdma_cm_node *cm_node;
+ struct list_head *list_core_temp, *list_node;
+
+ list_for_each_safe(list_node, list_core_temp, hte) {
+ cm_node = container_of(list_node, struct irdma_cm_node, list);
+ if (cm_node->close_entry || cm_node->send_entry) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->timer_entry, timer_list);
+ }
+ }
+}
+
+/**
+ * irdma_cm_timer_tick - system's timer expired callback
+ * @t: Pointer to timer_list
+ */
+static void irdma_cm_timer_tick(struct timer_list *t)
+{
+ unsigned long nexttimeout = jiffies + IRDMA_LONG_TIME;
+ struct irdma_cm_node *cm_node;
+ struct irdma_timer_entry *send_entry, *close_entry;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer);
+ struct irdma_sc_vsi *vsi;
+ u32 settimer = 0;
+ unsigned long timetosend;
+ struct irdma_sc_dev *dev;
+ unsigned long flags;
+ struct list_head timer_list;
+
+ INIT_LIST_HEAD(&timer_list);
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ irdma_build_timer_list(&timer_list, &cm_core->non_accelerated_list);
+ irdma_build_timer_list(&timer_list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &timer_list) {
+ cm_node = container_of(list_node,
+ struct irdma_cm_node,
+ timer_entry);
+ close_entry = cm_node->close_entry;
+
+ if (close_entry) {
+ if (time_after(close_entry->timetosend, jiffies)) {
+ if (nexttimeout > close_entry->timetosend ||
+ !settimer) {
+ nexttimeout = close_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ irdma_handle_close_entry(cm_node, 1);
+ }
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ goto done;
+ if (time_after(send_entry->timetosend, jiffies)) {
+ if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
+ if (nexttimeout > send_entry->timetosend ||
+ !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ irdma_free_retrans_entry(cm_node);
+ }
+ goto done;
+ }
+
+ if (cm_node->state == IRDMA_CM_STATE_OFFLOADED ||
+ cm_node->state == IRDMA_CM_STATE_CLOSED) {
+ irdma_free_retrans_entry(cm_node);
+ goto done;
+ }
+
+ if (!send_entry->retranscount || !send_entry->retrycount) {
+ irdma_free_retrans_entry(cm_node);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock,
+ flags);
+ irdma_retrans_expired(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ goto done;
+ }
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+ vsi = &cm_node->iwdev->vsi;
+ dev = cm_node->dev;
+ if (!cm_node->ack_rcvd) {
+ atomic_inc(&send_entry->sqbuf->refcount);
+ irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf);
+ cm_node->cm_core->stats_pkt_retrans++;
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ if (send_entry->send_retrans) {
+ send_entry->retranscount--;
+ timetosend = (IRDMA_RETRY_TIMEOUT <<
+ (IRDMA_DEFAULT_RETRANS -
+ send_entry->retranscount));
+
+ send_entry->timetosend = jiffies +
+ min(timetosend, IRDMA_MAX_TIMEOUT);
+ if (nexttimeout > send_entry->timetosend || !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ int close_when_complete;
+
+ close_when_complete = send_entry->close_when_complete;
+ irdma_free_retrans_entry(cm_node);
+ if (close_when_complete)
+ irdma_rem_ref_cm_node(cm_node);
+ }
+done:
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+
+ if (settimer) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (!timer_pending(&cm_core->tcp_timer)) {
+ cm_core->tcp_timer.expires = nexttimeout;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+}
+
+/**
+ * irdma_send_syn - send SYN packet
+ * @cm_node: connection's node
+ * @sendack: flag to set ACK bit or not
+ */
+int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack)
+{
+ struct irdma_puda_buf *sqbuf;
+ int flags = SET_SYN;
+ char optionsbuf[sizeof(struct option_mss) +
+ sizeof(struct option_windowscale) +
+ sizeof(struct option_base) + TCP_OPTIONS_PADDING];
+ struct irdma_kmem_info opts;
+ int optionssize = 0;
+ /* Sending MSS option */
+ union all_known_options *options;
+
+ opts.addr = optionsbuf;
+ if (!cm_node)
+ return -EINVAL;
+
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->mss.optionnum = OPTION_NUM_MSS;
+ options->mss.len = sizeof(struct option_mss);
+ options->mss.mss = htons(cm_node->tcp_cntxt.mss);
+ optionssize += sizeof(struct option_mss);
+
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->windowscale.optionnum = OPTION_NUM_WINDOW_SCALE;
+ options->windowscale.len = sizeof(struct option_windowscale);
+ options->windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+ optionssize += sizeof(struct option_windowscale);
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->eol = OPTION_NUM_EOL;
+ optionssize += 1;
+
+ if (sendack)
+ flags |= SET_ACK;
+
+ opts.size = optionssize;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, &opts, NULL, NULL, flags);
+ if (!sqbuf)
+ return -1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf,
+ IRDMA_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * irdma_send_ack - Send ACK packet
+ * @cm_node: connection's node
+ */
+void irdma_send_ack(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
+ if (sqbuf)
+ irdma_puda_send_buf(vsi->ilq, sqbuf);
+}
+
+/**
+ * irdma_send_fin - Send FIN pkt
+ * @cm_node: connection's node
+ */
+static int irdma_send_fin(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN);
+ if (!sqbuf)
+ return -1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf,
+ IRDMA_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * irdma_find_node - find a cm node that matches the reference
+ * cm node
+ * @cm_core: cm's core
+ * @rem_port: remote tcp port num
+ * @rem_addr: remote ip addr
+ * @loc_port: local tcp port num
+ * @loc_addr: loc ip addr
+ * @add_refcnt: flag to increment refcount of cm_node
+ * @accelerated_list: flag for accelerated vs non-accelerated list to search
+ */
+struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core,
+ u16 rem_port,
+ u32 *rem_addr,
+ u16 loc_port,
+ u32 *loc_addr,
+ bool add_refcnt,
+ bool accelerated_list)
+{
+ struct list_head *hte;
+ struct irdma_cm_node *cm_node;
+ unsigned long flags;
+
+ hte = accelerated_list ? &cm_core->accelerated_list : &cm_core->non_accelerated_list;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_entry(cm_node, hte, list) {
+ if (!memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
+ cm_node->loc_port == loc_port &&
+ !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr)) &&
+ cm_node->rem_port == rem_port) {
+ if (add_refcnt)
+ atomic_inc(&cm_node->ref_count);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ trace_irdma_find_node(cm_node, 0, NULL);
+ return cm_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ /* no owner node */
+ return NULL;
+}
+
+/**
+ * irdma_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_port: listener tcp port num
+ * @dst_addr: listener ip addr
+ * @listener_state: state to match with listen node's
+ */
+static struct irdma_cm_listener *irdma_find_listener(struct irdma_cm_core *cm_core,
+ u32 *dst_addr,
+ u16 dst_port,
+ u16 vlan_id,
+ enum irdma_cm_listener_state listener_state)
+{
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ u32 listen_addr[4];
+ u16 listen_port;
+ unsigned long flags;
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+ listen_port = listen_node->loc_port;
+ /* compare node pair, return node handle if a match */
+ if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
+ !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
+ listen_port == dst_port && vlan_id == listen_node->vlan_id &&
+ (listener_state & listen_node->listener_state)) {
+ atomic_inc(&listen_node->ref_count);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ trace_irdma_find_listener(listen_node);
+ return listen_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ return NULL;
+}
+
+/**
+ * irdma_add_hte_node - add a cm node to the hash table
+ * @cm_core: cm's core
+ * @cm_node: connection's node
+ */
+static void irdma_add_hte_node(struct irdma_cm_core *cm_core,
+ struct irdma_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ if (!cm_node || !cm_core)
+ return;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_add_tail(&cm_node->list, &cm_core->non_accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+}
+
+/**
+ * irdma_find_port - find port that matches reference port
+ * @hte: ptr to accelerated or non-accelerated list
+ * @port: port number
+ */
+static bool irdma_find_port(struct list_head *hte, u16 port)
+{
+ struct irdma_cm_node *cm_node;
+
+ list_for_each_entry(cm_node, hte, list) {
+ if (cm_node->loc_port == port)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * irdma_port_in_use - determine if port is in use
+ * @cm_core: cm's core
+ * @port: port number
+ */
+bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port)
+{
+ struct irdma_cm_listener *listen_node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (irdma_find_port(&cm_core->accelerated_list, port) ||
+ irdma_find_port(&cm_core->non_accelerated_list, port)) {
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ return true;
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ if (listen_node->loc_port == port) {
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ return true;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ return false;
+}
+
+/**
+ * irdma_del_multiple_qhash - Remove qhash and child listens
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ */
+static enum irdma_status_code
+irdma_del_multiple_qhash(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct irdma_cm_listener *child_listen_node;
+ enum irdma_status_code ret = IRDMA_ERR_CFG;
+ struct list_head *pos, *tpos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_for_each_safe(pos, tpos,
+ &cm_parent_listen_node->child_listen_list) {
+ child_listen_node = list_entry(pos, struct irdma_cm_listener,
+ child_listen_list);
+ if (child_listen_node->ipv4)
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ else
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
+ "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ trace_irdma_del_multiple_qhash(child_listen_node);
+ list_del(pos);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ cm_info->vlan_id = child_listen_node->vlan_id;
+ if (child_listen_node->qhash_set) {
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ child_listen_node->qhash_set = false;
+ } else {
+ ret = 0;
+ }
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "Child listen node freed = %p\n",
+ child_listen_node);
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
+ }
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+
+ return ret;
+}
+
+/**
+ * irdma_netdev_vlan_ipv6 - Gets the netdev and mac
+ * @addr: local IPv6 address
+ * @vlan_id: vlan id for the given IPv6 address
+ * @mac: mac address for the given IPv6 address
+ *
+ * Returns the net_device of the IPv6 address and also sets the
+ * vlan id and mac for that address.
+ */
+struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+{
+ struct net_device *ip_dev = NULL;
+ struct in6_addr laddr6;
+
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return NULL;
+
+ irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
+ if (vlan_id)
+ *vlan_id = IRDMA_NO_VLAN;
+ if (mac)
+ eth_zero_addr(mac);
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ if (ip_dev->dev_addr && mac)
+ ether_addr_copy(mac, ip_dev->dev_addr);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ip_dev;
+}
+
+/**
+ * irdma_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
+ * @addr: local IPv4 address
+ */
+u16 irdma_get_vlan_ipv4(u32 *addr)
+{
+ struct net_device *netdev;
+ u16 vlan_id = IRDMA_NO_VLAN;
+
+ netdev = ip_dev_find(&init_net, htonl(addr[0]));
+ if (netdev) {
+ vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ dev_put(netdev);
+ }
+
+ return vlan_id;
+}
+
+/**
+ * irdma_add_mqh_6 - Adds multiple qhashes for IPv6
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv6 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum irdma_status_code
+irdma_add_mqh_6(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp, *tmp;
+ enum irdma_status_code ret = 0;
+ struct irdma_cm_listener *child_listen_node;
+ unsigned long flags;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, ip_dev) {
+ if (!(ip_dev->flags & IFF_UP))
+ continue;
+
+ if (((rdma_vlan_dev_vlan_id(ip_dev) >= IRDMA_NO_VLAN) ||
+ (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) &&
+ ip_dev != iwdev->netdev)
+ continue;
+
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
+ "idev == NULL\n");
+ break;
+ }
+ list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "IP=%pI6, vlan_id=%d, MAC=%pM\n",
+ &ifp->addr,
+ rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_ERR,
+ "listener memory allocation\n");
+ ret = IRDMA_ERR_NO_MEMORY;
+ goto exit;
+ }
+
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+ irdma_copy_ip_ntohl(child_listen_node->loc_addr,
+ ifp->addr.in6_u.u6_addr32);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (ret) {
+ kfree(child_listen_node);
+ continue;
+ }
+
+ trace_irdma_add_mqh_6(iwdev, child_listen_node,
+ ip_dev->dev_addr);
+
+ child_listen_node->qhash_set = true;
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ }
+ }
+exit:
+ rtnl_unlock();
+
+ return ret;
+}
+
+/**
+ * irdma_add_mqh_4 - Adds multiple qhashes for IPv4
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv4 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum irdma_status_code
+irdma_add_mqh_4(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *dev;
+ struct in_device *idev;
+ struct irdma_cm_listener *child_listen_node;
+ enum irdma_status_code ret = 0;
+ unsigned long flags;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, dev) {
+ if (!(dev->flags & IFF_UP))
+ continue;
+
+ if (((rdma_vlan_dev_vlan_id(dev) >= IRDMA_NO_VLAN) ||
+ (rdma_vlan_dev_real_dev(dev) != iwdev->netdev)) &&
+ dev != iwdev->netdev)
+ continue;
+
+ idev = in_dev_get(dev);
+ for_ifa(idev) {
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+ &ifa->ifa_address,
+ rdma_vlan_dev_vlan_id(dev),
+ dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "listener memory allocation\n");
+ in_dev_put(idev);
+ ret = IRDMA_ERR_NO_MEMORY;
+ goto exit;
+ }
+
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+ child_listen_node->loc_addr[0] = ntohl(ifa->ifa_address);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ ret = irdma_manage_qhash(iwdev,
+ cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL,
+ true);
+ if (ret) {
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created--;
+ continue;
+ }
+
+ trace_irdma_add_mqh_4(iwdev, child_listen_node,
+ dev->dev_addr);
+
+ child_listen_node->qhash_set = true;
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ }
+ endfor_ifa(idev);
+ in_dev_put(idev);
+ }
+exit:
+ rtnl_unlock();
+
+ return ret;
+}
+
+/**
+ * irdma_dec_refcnt_listen - delete listener and associated cm nodes
+ * @cm_core: cm's core
+ * @free_hanging_nodes: to free associated cm_nodes
+ * @apbvt_del: flag to delete the apbvt
+ */
+static int irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ int free_hanging_nodes, bool apbvt_del)
+{
+ int err;
+ struct list_head *list_pos;
+ struct list_head *list_temp;
+ struct irdma_cm_node *cm_node;
+ struct list_head reset_list;
+ struct irdma_cm_info nfo;
+ enum irdma_cm_node_state old_state;
+ unsigned long flags;
+
+ trace_irdma_dec_refcnt_listen(listener, __builtin_return_address(0));
+ /* free non-accelerated child nodes for this listener */
+ INIT_LIST_HEAD(&reset_list);
+ if (free_hanging_nodes) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_pos, list_temp,
+ &cm_core->non_accelerated_list) {
+ cm_node = container_of(list_pos, struct irdma_cm_node, list);
+ if (cm_node->listener == listener && !cm_node->accelerated) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->reset_entry, &reset_list);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+
+ list_for_each_safe(list_pos, list_temp, &reset_list) {
+ cm_node = container_of(list_pos, struct irdma_cm_node, reset_entry);
+ if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) {
+ irdma_rem_ref_cm_node(cm_node);
+ continue;
+ }
+
+ irdma_cleanup_retrans_entry(cm_node);
+ err = irdma_send_reset(cm_node);
+ if (err) {
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_ERR,
+ "send reset failed\n");
+ } else {
+ old_state = cm_node->state;
+ cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD)
+ irdma_rem_ref_cm_node(cm_node);
+ }
+ }
+
+ if (!atomic_dec_return(&listener->ref_count)) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_del(&listener->list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ if (listener->iwdev) {
+ if (apbvt_del)
+ irdma_manage_apbvt(listener->iwdev,
+ listener->loc_port,
+ IRDMA_MANAGE_APBVT_DEL);
+ memcpy(nfo.loc_addr, listener->loc_addr,
+ sizeof(nfo.loc_addr));
+ nfo.loc_port = listener->loc_port;
+ nfo.ipv4 = listener->ipv4;
+ nfo.vlan_id = listener->vlan_id;
+ nfo.user_pri = listener->user_pri;
+ nfo.qh_qpid = listener->iwdev->vsi.ilq->qp_id;
+
+ if (!list_empty(&listener->child_listen_list)) {
+ irdma_del_multiple_qhash(listener->iwdev, &nfo,
+ listener);
+ } else {
+ if (listener->qhash_set)
+ irdma_manage_qhash(listener->iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ }
+ }
+
+ cm_core->stats_listen_destroyed++;
+ kfree(listener);
+ cm_core->stats_listen_nodes_destroyed++;
+ listener = NULL;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * irdma_cm_del_listen - delete a listener
+ * @cm_core: cm's core
+ * @listener: passive connection's listener
+ * @apbvt_del: flag to delete apbvt
+ */
+static int irdma_cm_del_listen(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ bool apbvt_del)
+{
+ listener->listener_state = IRDMA_CM_LISTENER_PASSIVE_STATE;
+ listener->cm_id = NULL;
+
+ return irdma_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
+}
+
+/**
+ * irdma_addr_resolve_neigh - resolve neighbor address
+ * @iwdev: iwarp device structure
+ * @src_ip: local ip address
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int irdma_addr_resolve_neigh(struct irdma_device *iwdev,
+ u32 src_ip,
+ u32 dst_ip,
+ int arpindex)
+{
+ struct rtable *rt;
+ struct neighbour *neigh;
+ int rc = arpindex;
+ __be32 dst_ipaddr = htonl(dst_ip);
+ __be32 src_ipaddr = htonl(src_ip);
+
+ rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
+ if (IS_ERR(rt)) {
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "ip_route_output fail\n");
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
+
+ rcu_read_lock();
+ if (!neigh)
+ goto exit;
+
+ if (neigh->nud_state & NUD_VALID)
+ rc = irdma_add_arp(iwdev->rf, &dst_ip, true, neigh->ha);
+ else
+ neigh_event_send(neigh, NULL);
+exit:
+ rcu_read_unlock();
+
+ if (neigh)
+ neigh_release(neigh);
+
+ ip_rt_put(rt);
+
+ return rc;
+}
+
+/**
+ * irdma_get_dst_ipv6 - get destination cache entry via ipv6 lookup
+ * @src_addr: local ipv6 sock address
+ * @dst_addr: destination ipv6 sock address
+ */
+static struct dst_entry *irdma_get_dst_ipv6(struct sockaddr_in6 *src_addr,
+ struct sockaddr_in6 *dst_addr)
+{
+ struct dst_entry *dst = NULL;
+
+ if ((IS_ENABLED(CONFIG_IPV6))) {
+ struct flowi6 fl6 = {};
+
+ fl6.daddr = dst_addr->sin6_addr;
+ fl6.saddr = src_addr->sin6_addr;
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = dst_addr->sin6_scope_id;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ }
+
+ return dst;
+}
+
+/**
+ * irdma_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
+ * @iwdev: iwarp device structure
+ * @src: local ip address
+ * @dst: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int irdma_addr_resolve_neigh_ipv6(struct irdma_device *iwdev,
+ u32 *src,
+ u32 *dest,
+ int arpindex)
+{
+ struct neighbour *neigh;
+ int rc = arpindex;
+ struct dst_entry *dst;
+ struct sockaddr_in6 dst_addr = {};
+ struct sockaddr_in6 src_addr = {};
+
+ dst_addr.sin6_family = AF_INET6;
+ irdma_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
+ src_addr.sin6_family = AF_INET6;
+ irdma_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
+ dst = irdma_get_dst_ipv6(&src_addr, &dst_addr);
+ if (!dst || dst->error) {
+ if (dst) {
+ dst_release(dst);
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "ip6_route_output returned dst->error = %d\n",
+ dst->error);
+ }
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
+
+ rcu_read_lock();
+ if (!neigh)
+ goto exit;
+
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
+ "dst_neigh_lookup MAC=%pM\n", neigh->ha);
+
+ trace_irdma_addr_resolve(iwdev, neigh->ha);
+
+ if (neigh->nud_state & NUD_VALID)
+ rc = irdma_add_arp(iwdev->rf, dest, false, neigh->ha);
+ else
+ neigh_event_send(neigh, NULL);
+exit:
+
+ rcu_read_unlock();
+ if (neigh)
+ neigh_release(neigh);
+ dst_release(dst);
+
+ return rc;
+}
+
+/**
+ * irdma_ipv4_is_lpb - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr)
+{
+ return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
+}
+
+/**
+ * irdma_ipv6_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr)
+{
+ struct in6_addr raddr6;
+
+ irdma_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
+
+ return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
+}
+
+/**
+ * irdma_cm_create_ah - create a cm address handle
+ * @cm_node: The connection manager node to create AH for
+ * @wait: Provides option to wait for ah creation or not
+ */
+static int irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait)
+{
+ struct irdma_ah_info ah_info = {};
+ struct irdma_device *iwdev = cm_node->iwdev;
+
+ ether_addr_copy(ah_info.mac_addr, iwdev->netdev->dev_addr);
+
+ ah_info.hop_ttl = 0x40;
+ ah_info.tc_tos = cm_node->tos;
+ ah_info.vsi = &iwdev->vsi;
+
+ if (cm_node->ipv4) {
+ ah_info.ipv4_valid = true;
+ ah_info.dest_ip_addr[0] = cm_node->rem_addr[0];
+ ah_info.src_ip_addr[0] = cm_node->loc_addr[0];
+ ah_info.do_lpbk = irdma_ipv4_is_lpb(ah_info.src_ip_addr[0],
+ ah_info.dest_ip_addr[0]);
+ } else {
+ memcpy(ah_info.dest_ip_addr, cm_node->rem_addr,
+ sizeof(ah_info.dest_ip_addr));
+ memcpy(ah_info.src_ip_addr, cm_node->loc_addr,
+ sizeof(ah_info.src_ip_addr));
+ ah_info.do_lpbk = irdma_ipv6_is_lpb(ah_info.src_ip_addr,
+ ah_info.dest_ip_addr);
+ }
+
+ ah_info.vlan_tag = cm_node->vlan_id;
+ if (cm_node->vlan_id <= VLAN_VID_MASK)
+ ah_info.insert_vlan_tag = 1;
+
+ ah_info.dst_arpindex = irdma_arp_table(iwdev->rf,
+ ah_info.dest_ip_addr,
+ ah_info.ipv4_valid,
+ NULL,
+ IRDMA_ARP_RESOLVE);
+
+ if (irdma_puda_create_ah(&iwdev->rf->sc_dev,
+ &ah_info,
+ wait,
+ IRDMA_PUDA_RSRC_TYPE_ILQ,
+ cm_node,
+ &cm_node->ah))
+ return -ENOMEM;
+
+ trace_irdma_create_ah(cm_node);
+ return 0;
+}
+
+/**
+ * irdma_cm_free_ah - free a cm address handle
+ * @cm_node: The connection manager node to create AH for
+ */
+static void irdma_cm_free_ah(struct irdma_cm_node *cm_node)
+{
+ struct irdma_device *iwdev = cm_node->iwdev;
+
+ trace_irdma_cm_free_ah(cm_node);
+ irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah);
+ cm_node->ah = NULL;
+}
+
+/**
+ * irdma_make_cm_node - create a new instance of a cm node
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ * @listener: passive connection's listener
+ */
+static struct irdma_cm_node *irdma_make_cm_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *listener)
+{
+ struct irdma_cm_node *cm_node;
+ int oldarpindex;
+ int arpindex;
+ struct net_device *netdev = iwdev->netdev;
+
+ /* create an hte and cm_node for this instance */
+ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+ if (!cm_node)
+ return NULL;
+
+ /* set our node specific transport info */
+ cm_node->ipv4 = cm_info->ipv4;
+ cm_node->vlan_id = cm_info->vlan_id;
+ if (cm_node->vlan_id == IRDMA_NO_VLAN && iwdev->dcb)
+ cm_node->vlan_id = 0;
+ cm_node->tos = cm_info->tos;
+ cm_node->user_pri = cm_info->user_pri;
+ if (listener) {
+ if (listener->tos != cm_info->tos)
+ irdma_dev_warn(&iwdev->rf->sc_dev,
+ "application TOS[%d] and remote client TOS[%d] mismatch\n",
+ listener->tos, cm_info->tos);
+ cm_node->tos = max(listener->tos, cm_info->tos);
+ cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB,
+ "listener: TOS:[%d] UP:[%d]\n",
+ cm_node->tos, cm_node->user_pri);
+ trace_irdma_listener_tos(iwdev, cm_node->tos,
+ cm_node->user_pri);
+ }
+ memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
+ memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
+ cm_node->loc_port = cm_info->loc_port;
+ cm_node->rem_port = cm_info->rem_port;
+
+ cm_node->mpa_frame_rev = IRDMA_CM_DEFAULT_MPA_VER;
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ cm_node->iwdev = iwdev;
+ cm_node->dev = &iwdev->rf->sc_dev;
+
+ cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird;
+ cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord;
+
+ cm_node->listener = listener;
+ cm_node->cm_id = cm_info->cm_id;
+ ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
+ spin_lock_init(&cm_node->retrans_list_lock);
+ cm_node->ack_rcvd = false;
+
+ atomic_set(&cm_node->ref_count, 1);
+ /* associate our parent CM core */
+ cm_node->cm_core = cm_core;
+ cm_node->tcp_cntxt.loc_id = IRDMA_CM_DEFAULT_LOCAL_ID;
+ cm_node->tcp_cntxt.rcv_wscale = iwdev->rcv_wscale;
+ cm_node->tcp_cntxt.rcv_wnd = iwdev->rcv_wnd >> cm_node->tcp_cntxt.rcv_wscale;
+ if (cm_node->ipv4) {
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
+ htonl(cm_node->rem_addr[0]),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV4;
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ __be32 loc[4] = {
+ htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
+ htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
+ };
+ __be32 rem[4] = {
+ htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
+ htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
+ };
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV6;
+ }
+
+ if ((cm_node->ipv4 &&
+ irdma_ipv4_is_lpb(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+ (!cm_node->ipv4 && irdma_ipv6_is_lpb(cm_node->loc_addr, cm_node->rem_addr))) {
+ arpindex = irdma_arp_table(iwdev->rf,
+ cm_node->rem_addr,
+ cm_node->ipv4,
+ NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ oldarpindex = irdma_arp_table(iwdev->rf,
+ cm_node->rem_addr,
+ cm_node->ipv4,
+ NULL,
+ IRDMA_ARP_RESOLVE);
+ if (cm_node->ipv4)
+ arpindex = irdma_addr_resolve_neigh(iwdev,
+ cm_info->loc_addr[0],
+ cm_info->rem_addr[0],
+ oldarpindex);
+ else if (IS_ENABLED(CONFIG_IPV6))
+ arpindex = irdma_addr_resolve_neigh_ipv6(iwdev,
+ cm_info->loc_addr,
+ cm_info->rem_addr,
+ oldarpindex);
+ else
+ arpindex = -EINVAL;
+ }
+
+ if (arpindex < 0)
+ goto err;
+
+ ether_addr_copy(cm_node->rem_mac,
+ iwdev->rf->arp_table[arpindex].mac_addr);
+ irdma_add_hte_node(cm_core, cm_node);
+ cm_core->stats_nodes_created++;
+ return cm_node;
+
+err:
+ kfree(cm_node);
+
+ return NULL;
+}
+
+/**
+ * irdma_rem_ref_cm_node - destroy an instance of a cm node
+ * @cm_node: connection's node
+ */
+static void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_qp *iwqp;
+ struct irdma_cm_info nfo;
+ unsigned long flags;
+
+ trace_irdma_rem_ref_cm_node(cm_node, 0, __builtin_return_address(0));
+ spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
+ if (atomic_dec_return(&cm_node->ref_count)) {
+ spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+ return;
+ }
+
+ list_del(&cm_node->list);
+ spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+
+ /* if the node is destroyed before connection was accelerated */
+ if (!cm_node->accelerated && cm_node->accept_pend) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_CM,
+ "node destroyed before established\n");
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ }
+ if (cm_node->close_entry)
+ irdma_handle_close_entry(cm_node, 0);
+ if (cm_node->listener) {
+ irdma_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
+ } else {
+ if (cm_node->apbvt_set) {
+ irdma_manage_apbvt(cm_node->iwdev,
+ cm_node->loc_port,
+ IRDMA_MANAGE_APBVT_DEL);
+ cm_node->apbvt_set = 0;
+ }
+ irdma_get_addr_info(cm_node, &nfo);
+ if (cm_node->qhash_set) {
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ cm_node->qhash_set = 0;
+ }
+ }
+
+ iwqp = cm_node->iwqp;
+ if (iwqp) {
+ iwqp->cm_node = NULL;
+ irdma_rem_ref(&iwqp->ibqp);
+ cm_node->iwqp = NULL;
+ } else if (cm_node->qhash_set) {
+ irdma_get_addr_info(cm_node, &nfo);
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
+ cm_node->qhash_set = 0;
+ }
+ cm_core->cm_free_ah(cm_node);
+ cm_node->cm_core->stats_nodes_destroyed++;
+ kfree(cm_node);
+}
+
+/**
+ * irdma_handle_fin_pkt - FIN packet received
+ * @cm_node: connection's node
+ */
+static void irdma_handle_fin_pkt(struct irdma_cm_node *cm_node)
+{
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_LAST_ACK;
+ irdma_send_fin(cm_node);
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ atomic_inc(&cm_node->ref_count);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSING;
+ irdma_send_ack(cm_node);
+ /*
+ * Wait for ACK as this is simultaneous close.
+ * After we receive ACK, do not send anything.
+ * Just rm the node.
+ */
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_TIME_WAIT;
+ irdma_send_ack(cm_node);
+ irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE, 1, 0);
+ break;
+ case IRDMA_CM_STATE_TIME_WAIT:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ default:
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "bad state node state = %d\n", cm_node->state);
+ break;
+ }
+}
+
+/**
+ * irdma_handle_rst_pkt - process received RST packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_rst_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ irdma_cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V2:
+ /* Drop down to MPA_V1*/
+ cm_node->mpa_frame_rev = IETF_MPA_V1;
+ /* send a syn and goto syn sent state */
+ cm_node->state = IRDMA_CM_STATE_SYN_SENT;
+ if (irdma_send_syn(cm_node, 0))
+ irdma_active_open_err(cm_node, false);
+ break;
+ case IETF_MPA_V1:
+ default:
+ irdma_active_open_err(cm_node, false);
+ break;
+ }
+ break;
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ atomic_add_return(1, &cm_node->passive_state);
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_passive_open_err(cm_node, false);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ irdma_active_open_err(cm_node, false);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_LAST_ACK:
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ /* fall through */
+ case IRDMA_CM_STATE_TIME_WAIT:
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_rcv_mpa - Process a recv'd mpa buffer
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ int err;
+ int datasize = rbuf->datalen;
+ u8 *dataloc = rbuf->data;
+
+ enum irdma_cm_event_type type = IRDMA_CM_EVENT_UNKNOWN;
+ u32 res_type;
+
+ err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize);
+ if (err) {
+ if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT)
+ irdma_active_open_err(cm_node, true);
+ else
+ irdma_passive_open_err(cm_node, true);
+ return;
+ }
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_ESTABLISHED:
+ if (res_type == IRDMA_MPA_REQUEST_REJECT)
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_CM,
+ "state for reject\n");
+ cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD;
+ type = IRDMA_CM_EVENT_MPA_REQ;
+ irdma_send_ack(cm_node); /* ACK received MPA request */
+ atomic_set(&cm_node->passive_state,
+ IRDMA_PASSIVE_STATE_INDICATED);
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ irdma_cleanup_retrans_entry(cm_node);
+ if (res_type == IRDMA_MPA_REQUEST_REJECT) {
+ type = IRDMA_CM_EVENT_MPA_REJECT;
+ cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD;
+ } else {
+ type = IRDMA_CM_EVENT_CONNECTED;
+ cm_node->state = IRDMA_CM_STATE_OFFLOADED;
+ }
+ irdma_send_ack(cm_node);
+ break;
+ default:
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "wrong cm_node state =%d\n", cm_node->state);
+ break;
+ }
+ irdma_create_event(cm_node, type);
+}
+
+/**
+ * irdma_check_syn - Check for error on received syn ack
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int irdma_check_syn(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
+{
+ if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
+ irdma_active_open_err(cm_node, true);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_check_seq - check seq numbers if OK
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int irdma_check_seq(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
+{
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+ int err = 0;
+
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num ||
+ !between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+ err = -1;
+ if (err)
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR, "seq number err\n");
+
+ return err;
+}
+
+/**
+ * irdma_handle_syn_pkt - is for Passive node
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_syn_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int err;
+ u32 inc_sequence;
+ int optionsize;
+ struct irdma_cm_info nfo;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection */
+ irdma_active_open_err(cm_node, 1);
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ cm_node->cm_core->stats_backlog_drops++;
+ irdma_passive_open_err(cm_node, false);
+ break;
+ }
+ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (err) {
+ irdma_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ err = cm_node->cm_core->cm_create_ah(cm_node, false);
+ if (err) {
+ irdma_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+ cm_node->state = IRDMA_CM_STATE_SYN_RCVD;
+ irdma_get_addr_info(cm_node, &nfo);
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ err = irdma_manage_qhash(cm_node->iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ (void *)cm_node,
+ false);
+ cm_node->qhash_set = true;
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ irdma_cleanup_retrans_entry(cm_node);
+ atomic_inc(&cm_node->ref_count);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_CLOSING:
+ case IRDMA_CM_STATE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_synack_pkt - Process SYN+ACK packet (active side)
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_synack_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int err;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ irdma_cleanup_retrans_entry(cm_node);
+ /* active open */
+ if (irdma_check_syn(cm_node, tcph)) {
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "check syn fail\n");
+ return;
+ }
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 0);
+ if (err) {
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ irdma_send_ack(cm_node); /* ACK for the syn_ack */
+ err = irdma_send_mpa_request(cm_node);
+ if (err) {
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "cm_node=%p irdma_send_mpa_request failed\n",
+ cm_node);
+ break;
+ }
+ cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT;
+ break;
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ irdma_passive_open_err(cm_node, true);
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ irdma_cleanup_retrans_entry(cm_node);
+ atomic_inc(&cm_node->ref_count);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_CLOSING:
+ case IRDMA_CM_STATE_UNKNOWN:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_ack_pkt - process packet with ACK
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static int irdma_handle_ack_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 inc_sequence;
+ int ret;
+ int optionsize;
+ u32 datasize = rbuf->datalen;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+ if (irdma_check_seq(cm_node, tcph))
+ return -EINVAL;
+
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ irdma_cleanup_retrans_entry(cm_node);
+ ret = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (ret)
+ return ret;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ cm_node->state = IRDMA_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ irdma_cleanup_retrans_entry(cm_node);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->ack_rcvd = false;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ } else {
+ cm_node->ack_rcvd = true;
+ }
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ irdma_cleanup_retrans_entry(cm_node);
+ atomic_inc(&cm_node->ref_count);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_CLOSING:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ if (!cm_node->accept_pend)
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_FIN_WAIT2;
+ break;
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ case IRDMA_CM_STATE_UNKNOWN:
+ default:
+ irdma_cleanup_retrans_entry(cm_node);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_process_packet - process cm packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_process_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ enum irdma_tcpip_pkt_type pkt_type = IRDMA_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 fin_set = 0;
+ int err;
+
+ if (tcph->rst) {
+ pkt_type = IRDMA_PKT_TYPE_RST;
+ } else if (tcph->syn) {
+ pkt_type = IRDMA_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = IRDMA_PKT_TYPE_SYNACK;
+ } else if (tcph->ack) {
+ pkt_type = IRDMA_PKT_TYPE_ACK;
+ }
+ if (tcph->fin)
+ fin_set = 1;
+
+ switch (pkt_type) {
+ case IRDMA_PKT_TYPE_SYN:
+ irdma_handle_syn_pkt(cm_node, rbuf);
+ break;
+ case IRDMA_PKT_TYPE_SYNACK:
+ irdma_handle_synack_pkt(cm_node, rbuf);
+ break;
+ case IRDMA_PKT_TYPE_ACK:
+ err = irdma_handle_ack_pkt(cm_node, rbuf);
+ if (fin_set && !err)
+ irdma_handle_fin_pkt(cm_node);
+ break;
+ case IRDMA_PKT_TYPE_RST:
+ irdma_handle_rst_pkt(cm_node, rbuf);
+ break;
+ default:
+ if (fin_set &&
+ (!irdma_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
+ irdma_handle_fin_pkt(cm_node);
+ break;
+ }
+}
+
+/**
+ * irdma_make_listen_node - create a listen node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ */
+static struct irdma_cm_listener *irdma_make_listen_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info)
+{
+ struct irdma_cm_listener *listener;
+ unsigned long flags;
+
+ /* cannot have multiple matching listeners */
+ listener = irdma_find_listener(cm_core, cm_info->loc_addr,
+ cm_info->loc_port,
+ cm_info->vlan_id,
+ IRDMA_CM_LISTENER_EITHER_STATE);
+ if (listener &&
+ listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) {
+ atomic_dec(&listener->ref_count);
+ return NULL;
+ }
+
+ if (!listener) {
+ /* create a CM listen node
+ * 1/2 node to compare incoming traffic to
+ */
+ listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+ if (!listener)
+ return NULL;
+ cm_core->stats_listen_nodes_created++;
+ memcpy(listener->loc_addr, cm_info->loc_addr,
+ sizeof(listener->loc_addr));
+ listener->loc_port = cm_info->loc_port;
+
+ INIT_LIST_HEAD(&listener->child_listen_list);
+
+ atomic_set(&listener->ref_count, 1);
+ } else {
+ listener->reused_node = 1;
+ }
+
+ listener->cm_id = cm_info->cm_id;
+ listener->ipv4 = cm_info->ipv4;
+ listener->vlan_id = cm_info->vlan_id;
+ atomic_set(&listener->pend_accepts_cnt, 0);
+ listener->cm_core = cm_core;
+ listener->iwdev = iwdev;
+
+ listener->backlog = cm_info->backlog;
+ listener->listener_state = IRDMA_CM_LISTENER_ACTIVE_STATE;
+
+ if (!listener->reused_node) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_add(&listener->list, &cm_core->listen_nodes);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ }
+
+ return listener;
+}
+
+/**
+ * irdma_create_cm_node - make a connection node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @private_data_len: len to private data for mpa request
+ * @private_data: pointer to private data for connection
+ * @cm_info: quad info for connection
+ * @caller_cm_node: pointer to cm_node structure to return
+ */
+static int irdma_create_cm_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct iw_cm_conn_param *conn_param,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_node **caller_cm_node)
+{
+ struct irdma_cm_node *cm_node;
+ u16 private_data_len = conn_param->private_data_len;
+ void *private_data = (void *)conn_param->private_data;
+
+ /* create a CM connection node */
+ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL);
+ if (!cm_node)
+ return -ENOMEM;
+
+ /* set our node side to client (active) side */
+ cm_node->tcp_cntxt.client = 1;
+ cm_node->tcp_cntxt.rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+
+ irdma_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
+
+ cm_node->pdata.size = private_data_len;
+ cm_node->pdata.addr = cm_node->pdata_buf;
+
+ memcpy(cm_node->pdata_buf, private_data, private_data_len);
+ *caller_cm_node = cm_node;
+
+ return 0;
+}
+
+/**
+ * irdma_cm_reject - reject and teardown a connection
+ * @cm_node: connection's node
+ * @pdate: ptr to private data for reject
+ * @plen: size of private data
+ */
+static int irdma_cm_reject(struct irdma_cm_node *cm_node,
+ const void *pdata,
+ u8 plen)
+{
+ int ret;
+ int passive_state;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+
+ if (cm_node->tcp_cntxt.client)
+ return 0;
+
+ irdma_cleanup_retrans_entry(cm_node);
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == IRDMA_SEND_RESET_EVENT) {
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+
+ if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+
+ ret = irdma_send_mpa_reject(cm_node, pdata, plen);
+ if (!ret) {
+ cm_id->add_ref(cm_id);
+ return 0;
+ }
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ if (irdma_send_reset(cm_node))
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "send reset failed\n");
+ return ret;
+}
+
+/**
+ * irdma_cm_close - close of cm connection
+ * @cm_node: connection's node
+ */
+static int irdma_cm_close(struct irdma_cm_node *cm_node)
+{
+ if (!cm_node)
+ return -EINVAL;
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_ACCEPTING:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ irdma_cleanup_retrans_entry(cm_node);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSE_WAIT:
+ cm_node->state = IRDMA_CM_STATE_LAST_ACK;
+ irdma_send_fin(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_TIME_WAIT:
+ case IRDMA_CM_STATE_CLOSING:
+ return -1;
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_cleanup_retrans_entry(cm_node);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ case IRDMA_CM_STATE_UNKNOWN:
+ case IRDMA_CM_STATE_INITED:
+ case IRDMA_CM_STATE_CLOSED:
+ case IRDMA_CM_STATE_LISTENER_DESTROYED:
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ if (cm_node->send_entry)
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "CM send_entry in OFFLOADED state\n");
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_receive_ilq - recv an ETHERNET packet, and process it
+ * through CM
+ * @dev: HW dev struct
+ * @rbuf: receive buffer
+ */
+void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
+{
+ struct irdma_cm_node *cm_node;
+ struct irdma_cm_listener *listener;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ struct irdma_cm_info cm_info = {};
+ struct irdma_device *iwdev = (struct irdma_device *)vsi->back_vsi;
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct vlan_ethhdr *ethh;
+ u16 vtag;
+
+ /* if vlan, then maclen = 18 else 14 */
+ iph = (struct iphdr *)rbuf->iph;
+ irdma_debug_buf(vsi->dev,
+ IRDMA_DEBUG_ILQ,
+ "RECEIVE ILQ BUFFER",
+ rbuf->mem.va,
+ rbuf->totallen);
+ if (iwdev->rf->sc_dev.hw_attrs.hw_rev > IRDMA_GEN_1) {
+ if (rbuf->vlan_valid) {
+ vtag = rbuf->vlan_id;
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
+ } else {
+ cm_info.vlan_id = IRDMA_NO_VLAN;
+ }
+ } else {
+ ethh = (struct vlan_ethhdr *)rbuf->mem.va;
+
+ if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
+ vtag = ntohs(ethh->h_vlan_TCI);
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
+ irdma_debug(cm_core->dev,
+ IRDMA_DEBUG_CM,
+ "vlan_id=%d\n",
+ cm_info.vlan_id);
+ } else {
+ cm_info.vlan_id = IRDMA_NO_VLAN;
+ }
+ }
+ tcph = (struct tcphdr *)rbuf->tcph;
+
+ if (rbuf->ipv4) {
+ cm_info.loc_addr[0] = ntohl(iph->daddr);
+ cm_info.rem_addr[0] = ntohl(iph->saddr);
+ cm_info.ipv4 = true;
+ cm_info.tos = iph->tos;
+ } else {
+ ip6h = (struct ipv6hdr *)rbuf->iph;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(cm_info.rem_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ cm_info.ipv4 = false;
+ cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
+ }
+ cm_info.loc_port = ntohs(tcph->dest);
+ cm_info.rem_port = ntohs(tcph->source);
+ cm_node = irdma_find_node(cm_core,
+ cm_info.rem_port,
+ cm_info.rem_addr,
+ cm_info.loc_port,
+ cm_info.loc_addr,
+ true, false);
+
+ if (!cm_node) {
+ /* Only type of packet accepted are for the
+ * PASSIVE open (syn only)
+ */
+ if (!tcph->syn || tcph->ack)
+ return;
+
+ listener = irdma_find_listener(cm_core,
+ cm_info.loc_addr,
+ cm_info.loc_port,
+ cm_info.vlan_id,
+ IRDMA_CM_LISTENER_ACTIVE_STATE);
+ if (!listener) {
+ cm_info.cm_id = NULL;
+ irdma_debug(cm_core->dev, IRDMA_DEBUG_CM,
+ "no listener found\n");
+ return;
+ }
+
+ cm_info.cm_id = listener->cm_id;
+ cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info,
+ listener);
+ if (!cm_node) {
+ irdma_debug(cm_core->dev,
+ IRDMA_DEBUG_CM,
+ "allocate node failed\n");
+ atomic_dec(&listener->ref_count);
+ return;
+ }
+
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = IRDMA_CM_STATE_LISTENING;
+ } else {
+ irdma_rem_ref_cm_node(cm_node);
+ return;
+ }
+
+ atomic_inc(&cm_node->ref_count);
+ } else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return;
+ }
+
+ irdma_process_pkt(cm_node, rbuf);
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+static int irdma_cm_create_ah_nop(struct irdma_cm_node *cm_node, bool wait)
+{
+ return 0;
+}
+
+static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node)
+{
+}
+
+/**
+ * irdma_setup_cm_core - allocate a top level instance of a cm
+ * core
+ * @iwdev: iwarp device structure
+ */
+void irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+
+ cm_core->iwdev = iwdev;
+ cm_core->dev = &iwdev->rf->sc_dev;
+
+ INIT_LIST_HEAD(&cm_core->accelerated_list);
+ INIT_LIST_HEAD(&cm_core->non_accelerated_list);
+ INIT_LIST_HEAD(&cm_core->listen_nodes);
+
+ timer_setup(&cm_core->tcp_timer, irdma_cm_timer_tick, 0);
+
+ spin_lock_init(&cm_core->ht_lock);
+ spin_lock_init(&cm_core->listen_list_lock);
+ spin_lock_init(&cm_core->apbvt_lock);
+
+ cm_core->event_wq = alloc_ordered_workqueue("iwewq", WQ_MEM_RECLAIM);
+ cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq", WQ_MEM_RECLAIM);
+ switch (rdma_ver) {
+ case IRDMA_GEN_1:
+ cm_core->form_cm_frame = irdma_form_uda_cm_frame;
+ cm_core->cm_create_ah = irdma_cm_create_ah_nop;
+ cm_core->cm_free_ah = irdma_cm_free_ah_nop;
+ break;
+ case IRDMA_GEN_2:
+ default:
+ cm_core->form_cm_frame = irdma_form_ah_cm_frame;
+ cm_core->cm_create_ah = irdma_cm_create_ah;
+ cm_core->cm_free_ah = irdma_cm_free_ah;
+ }
+}
+
+/**
+ * irdma_cleanup_cm_core - deallocate a top level instance of a
+ * cm core
+ * @cm_core: cm's core
+ */
+void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
+{
+ unsigned long flags;
+
+ if (!cm_core)
+ return;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (timer_pending(&cm_core->tcp_timer))
+ del_timer_sync(&cm_core->tcp_timer);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ destroy_workqueue(cm_core->event_wq);
+ destroy_workqueue(cm_core->disconn_wq);
+}
+
+/**
+ * irdma_init_tcp_ctx - setup qp context
+ * @cm_node: connection's node
+ * @tcp_info: offload info for tcp
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_init_tcp_ctx(struct irdma_cm_node *cm_node,
+ struct irdma_tcp_offload_info *tcp_info,
+ struct irdma_qp *iwqp)
+{
+ tcp_info->ipv4 = cm_node->ipv4;
+ tcp_info->drop_ooo_seg = !iwqp->iwdev->rf->ooo;
+ tcp_info->wscale = true;
+ tcp_info->ignore_tcp_opt = true;
+ tcp_info->ignore_tcp_uns_opt = true;
+ tcp_info->no_nagle = false;
+
+ tcp_info->ttl = IRDMA_DEFAULT_TTL;
+ tcp_info->rtt_var = IRDMA_DEFAULT_RTT_VAR;
+ tcp_info->ss_thresh = IRDMA_DEFAULT_SS_THRESH;
+ tcp_info->rexmit_thresh = IRDMA_DEFAULT_REXMIT_THRESH;
+
+ tcp_info->tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
+ tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->snd_nxt = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ tcp_info->rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ tcp_info->snd_max = cm_node->tcp_cntxt.loc_seq_num;
+
+ tcp_info->snd_una = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->cwnd = 2 * cm_node->tcp_cntxt.mss;
+ tcp_info->snd_wl1 = cm_node->tcp_cntxt.rcv_nxt;
+ tcp_info->snd_wl2 = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->max_snd_window = cm_node->tcp_cntxt.max_snd_wnd;
+ tcp_info->rcv_wnd = cm_node->tcp_cntxt.rcv_wnd <<
+ cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->flow_label = 0;
+ tcp_info->snd_mss = (u32)cm_node->tcp_cntxt.mss;
+ if (cm_node->vlan_id <= VLAN_VID_MASK) {
+ tcp_info->insert_vlan_tag = true;
+ tcp_info->vlan_tag = cm_node->vlan_id;
+ }
+ if (cm_node->ipv4) {
+ tcp_info->src_port = cm_node->loc_port;
+ tcp_info->dst_port = cm_node->rem_port;
+
+ tcp_info->dest_ip_addr3 = cm_node->rem_addr[0];
+ tcp_info->local_ipaddr3 = cm_node->loc_addr[0];
+ tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
+ &tcp_info->dest_ip_addr3,
+ true,
+ NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ tcp_info->src_port = cm_node->loc_port;
+ tcp_info->dst_port = cm_node->rem_port;
+ tcp_info->dest_ip_addr0 = cm_node->rem_addr[0];
+ tcp_info->dest_ip_addr1 = cm_node->rem_addr[1];
+ tcp_info->dest_ip_addr2 = cm_node->rem_addr[2];
+ tcp_info->dest_ip_addr3 = cm_node->rem_addr[3];
+ tcp_info->local_ipaddr0 = cm_node->loc_addr[0];
+ tcp_info->local_ipaddr1 = cm_node->loc_addr[1];
+ tcp_info->local_ipaddr2 = cm_node->loc_addr[2];
+ tcp_info->local_ipaddr3 = cm_node->loc_addr[3];
+ tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
+ &tcp_info->dest_ip_addr0,
+ false,
+ NULL,
+ IRDMA_ARP_RESOLVE);
+ }
+}
+
+/**
+ * irdma_cm_init_tsa_conn - setup qp for RTS
+ * @iwqp: associate qp for the connection
+ * @cm_node: connection's node
+ */
+static void irdma_cm_init_tsa_conn(struct irdma_qp *iwqp,
+ struct irdma_cm_node *cm_node)
+{
+ struct irdma_tcp_offload_info tcp_info = {};
+ struct irdma_iwarp_offload_info *iwarp_info;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_sc_dev *dev = &iwqp->iwdev->rf->sc_dev;
+
+ iwarp_info = &iwqp->iwarp_info;
+ ctx_info = &iwqp->ctx_info;
+
+ ctx_info->tcp_info = &tcp_info;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+ iwarp_info->ord_size = cm_node->ord_size;
+ iwarp_info->ird_size = irdma_derive_hw_ird_setting(cm_node->ird_size);
+ iwarp_info->rd_en = true;
+ iwarp_info->rdmap_ver = 1;
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
+
+ ctx_info->tcp_info_valid = true;
+ ctx_info->iwarp_info_valid = true;
+ ctx_info->add_to_qoslist = true;
+ ctx_info->user_pri = cm_node->user_pri;
+
+ irdma_init_tcp_ctx(cm_node, &tcp_info, iwqp);
+ if (cm_node->snd_mark_en) {
+ iwarp_info->snd_mark_en = true;
+ iwarp_info->snd_mark_offset =
+ (tcp_info.snd_nxt & SNDMARKER_SEQNMASK) + cm_node->lsmm_size;
+ }
+
+ cm_node->state = IRDMA_CM_STATE_OFFLOADED;
+ tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx;
+
+ dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ iwqp->host_ctx.va, ctx_info);
+
+ /* once tcp_info is set, no need to do it again */
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+ ctx_info->add_to_qoslist = false;
+}
+
+/**
+ * irdma_cm_disconn - when a connection is being closed
+ * @iwqp: associated qp for the connection
+ */
+void irdma_cm_disconn(struct irdma_qp *iwqp)
+{
+ struct disconn_work *work;
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (!iwdev->rf->qp_table[iwqp->ibqp.qp_num]) {
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
+ "qp_id %d is already freed\n",
+ iwqp->ibqp.qp_num);
+ kfree(work);
+ return;
+ }
+ irdma_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+
+ work->iwqp = iwqp;
+ INIT_WORK(&work->work, irdma_disconnect_worker);
+ queue_work(cm_core->disconn_wq, &work->work);
+}
+
+/**
+ * irdma_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_qp_disconnect(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev;
+ struct irdma_ib_device *iwibdev;
+
+ iwdev = iwqp->iwdev;
+ if (!iwdev) {
+ irdma_pr_err("iwdev == NULL\n");
+ return;
+ }
+
+ iwibdev = iwdev->iwibdev;
+
+ if (iwqp->active_conn) {
+ /* indicate this connection is NOT active */
+ iwqp->active_conn = 0;
+ } else {
+ /* Need to free the Last Streaming Mode Message */
+ if (iwqp->ietf_mem.va) {
+ if (iwqp->lsmm_mr)
+ iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr);
+ irdma_free_dma_mem(iwdev->rf->sc_dev.hw,
+ &iwqp->ietf_mem);
+ }
+ }
+
+ /* close the CM node down if it is still active */
+ if (iwqp->cm_node) {
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "Call close API\n");
+ irdma_cm_close(iwqp->cm_node);
+ }
+}
+
+/**
+ * irdma_cm_disconn_true - called by worker thread to disconnect qp
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
+{
+ struct iw_cm_id *cm_id;
+ struct irdma_device *iwdev;
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+ u16 last_ae;
+ u8 original_hw_tcp_state;
+ u8 original_ibqp_state;
+ int disconn_status = 0;
+ int issue_disconn = 0;
+ int issue_close = 0;
+ int issue_flush = 0;
+ struct ib_event ibevent;
+ unsigned long flags;
+ int err;
+
+ if (!iwqp) {
+ irdma_pr_err("iwqp == NULL\n");
+ return;
+ }
+
+ iwdev = iwqp->iwdev;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (rdma_protocol_roce(&iwdev->iwibdev->ibdev, 1)) {
+ struct ib_qp_attr attr;
+
+ if (iwqp->flush_issued || iwqp->destroyed) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp_roce(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (iwqp->ibqp.event_handler) {
+ ibevent.device = iwqp->ibqp.device;
+ ibevent.event = IB_EVENT_QP_FATAL;
+ ibevent.element.qp = &iwqp->ibqp;
+ iwqp->ibqp.event_handler(&ibevent,
+ iwqp->ibqp.qp_context);
+ }
+ return;
+ }
+
+ cm_id = iwqp->cm_id;
+ /* make sure we havent already closed this connection */
+ if (!cm_id) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ return;
+ }
+
+ original_hw_tcp_state = iwqp->hw_tcp_state;
+ original_ibqp_state = iwqp->ibqp_state;
+ last_ae = iwqp->last_aeq;
+
+ if (qp->term_flags) {
+ issue_disconn = 1;
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ irdma_terminate_del_timer(qp);
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ } else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) {
+ issue_disconn = 1;
+ if (last_ae == IRDMA_AE_LLP_CONNECTION_RESET)
+ disconn_status = -ECONNRESET;
+ }
+
+ if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+ original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+ last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+ last_ae == IRDMA_AE_LLP_CONNECTION_RESET ||
+ iwdev->reset)) {
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ qp->term_flags = 0;
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (issue_flush && !iwqp->destroyed) {
+ /* Flush the queues */
+ irdma_flush_wqes(iwdev->rf, iwqp);
+
+ if (qp->term_flags && iwqp->ibqp.event_handler) {
+ ibevent.device = iwqp->ibqp.device;
+ ibevent.event =
+ (qp->eventtype == TERM_EVENT_QP_FATAL) ?
+ IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
+ ibevent.element.qp = &iwqp->ibqp;
+ iwqp->ibqp.event_handler(&ibevent,
+ iwqp->ibqp.qp_context);
+ }
+ }
+
+ if (!cm_id || !cm_id->event_handler)
+ return;
+ if (issue_disconn) {
+ err = irdma_send_cm_event(NULL,
+ cm_id,
+ IW_CM_EVENT_DISCONNECT,
+ disconn_status);
+ if (err)
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "disconnect event failed: - cm_id = %p\n",
+ cm_id);
+ }
+ if (issue_close) {
+ irdma_qp_disconnect(iwqp);
+ cm_id->provider_data = iwqp;
+ err = irdma_send_cm_event(NULL,
+ cm_id,
+ IW_CM_EVENT_CLOSE,
+ 0);
+ if (err)
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "close event failed: - cm_id = %p\n",
+ cm_id);
+ cm_id->rem_ref(cm_id);
+ }
+}
+
+/**
+ * irdma_disconnect_worker - worker for connection close
+ * @work: points or disconn structure
+ */
+static void irdma_disconnect_worker(struct work_struct *work)
+{
+ struct disconn_work *dwork =
+ container_of(work, struct disconn_work, work);
+ struct irdma_qp *iwqp = dwork->iwqp;
+
+ kfree(dwork);
+ irdma_cm_disconn_true(iwqp);
+ irdma_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_accept - registered call for connection to be accepted
+ * @cm_id: cm information for passive connection
+ * @conn_param: accpet parameters
+ */
+int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct irdma_cm_core *cm_core;
+ struct irdma_cm_node *cm_node;
+ struct ib_qp_attr attr = {};
+ int passive_state;
+ struct ib_mr *ibmr;
+ struct irdma_pd *iwpd;
+ u16 buf_len = 0;
+ struct irdma_kmem_info accept;
+ enum irdma_status_code status;
+ unsigned long flags;
+ u64 tagged_offset;
+ int wait_ret = 0;
+
+ ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+
+ iwqp = to_iwqp(ibqp);
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->rf->sc_dev;
+ cm_core = &iwdev->cm_core;
+ cm_node = (struct irdma_cm_node *)cm_id->provider_data;
+
+ if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
+ cm_node->ipv4 = true;
+ cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr);
+ } else {
+ cm_node->ipv4 = false;
+ irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id,
+ NULL);
+ }
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "Accept vlan_id=%d\n",
+ cm_node->vlan_id);
+
+ trace_irdma_accept(cm_node, 0, NULL);
+
+ if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return -EINVAL;
+ }
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == IRDMA_SEND_RESET_EVENT) {
+ irdma_rem_ref_cm_node(cm_node);
+ return -ECONNRESET;
+ }
+
+ cm_node->cm_core->stats_accepts++;
+ iwqp->cm_node = (void *)cm_node;
+ cm_node->iwqp = iwqp;
+
+ if (cm_node->dev->ws_add(iwqp->sc_qp.vsi, cm_node->user_pri))
+ return -ENOMEM;
+
+ buf_len = conn_param->private_data_len + IRDMA_MAX_IETF_SIZE;
+
+ status = irdma_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
+ if (status)
+ return -ENOMEM;
+
+ cm_node->pdata.size = conn_param->private_data_len;
+ accept.addr = iwqp->ietf_mem.va;
+ accept.size = irdma_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
+ memcpy(accept.addr + accept.size, conn_param->private_data,
+ conn_param->private_data_len);
+
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ iwpd = iwqp->iwpd;
+ tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
+ ibmr = irdma_reg_phys_mr(&iwpd->ibpd,
+ iwqp->ietf_mem.pa,
+ buf_len,
+ IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (IS_ERR(ibmr)) {
+ irdma_free_dma_mem(dev->hw, &iwqp->ietf_mem);
+ return -ENOMEM;
+ }
+
+ ibmr->pd = &iwpd->ibpd;
+ ibmr->device = iwpd->ibpd.device;
+ iwqp->lsmm_mr = ibmr;
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+ dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
+ iwqp->ietf_mem.va,
+ (accept.size + conn_param->private_data_len),
+ ibmr->lkey);
+
+ if (iwqp->page)
+ kunmap(iwqp->page);
+
+ iwqp->cm_id = cm_id;
+ cm_node->cm_id = cm_id;
+
+ cm_id->provider_data = (void *)iwqp;
+ iwqp->active_conn = 0;
+
+ cm_node->lsmm_size = accept.size + conn_param->private_data_len;
+
+ irdma_cm_init_tsa_conn(iwqp, cm_node);
+ cm_id->add_ref(cm_id);
+ irdma_add_ref(&iwqp->ibqp);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ cm_node->cm_core->cm_free_ah(cm_node);
+
+ irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_move_tail(&cm_node->list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ if (dev->hw_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
+ wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
+ iwqp->rts_ae_rcvd,
+ IRDMA_MAX_TIMEOUT);
+ if (!wait_ret)
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_CM,
+ "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
+ cm_node,
+ cm_node->loc_port,
+ cm_node->rem_port,
+ cm_node->cm_id);
+ }
+
+ cm_node->accelerated = true;
+ irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+
+ if (cm_node->accept_pend) {
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "rem_port=0x%04x, loc_port=0x%04x\n",
+ cm_node->rem_port,
+ cm_node->loc_port);
+
+ return 0;
+}
+
+/**
+ * irdma_reject - registered call for connection to be rejected
+ * @cm_id: cm information for passive connection
+ * @pdata: private data to be sent
+ * @pdata_len: private data length
+ */
+int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+
+ cm_node = (struct irdma_cm_node *)cm_id->provider_data;
+ cm_node->cm_id = cm_id;
+ cm_node->pdata.size = pdata_len;
+
+ trace_irdma_reject(cm_node, 0, NULL);
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ cm_node->cm_core->stats_rejects++;
+
+ if (pdata_len + sizeof(struct ietf_mpa_v2) > IRDMA_MAX_CM_BUF)
+ return -EINVAL;
+
+ return irdma_cm_reject(cm_node, pdata, pdata_len);
+}
+
+/**
+ * irdma_connect - registered call for connection to be established
+ * @cm_id: cm information for passive connection
+ * @conn_param: Information about the connection
+ */
+int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+ struct irdma_cm_info cm_info;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ int ret = 0;
+
+ ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ iwqp = to_iwqp(ibqp);
+ if (!iwqp)
+ return -EINVAL;
+ iwdev = iwqp->iwdev;
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+ if (!(laddr->sin_port) || !(raddr->sin_port))
+ return -EINVAL;
+
+ iwqp->active_conn = 1;
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = iwqp;
+
+ /* set up the connection params for the node */
+ if (cm_id->remote_addr.ss_family == AF_INET) {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
+ return -EINVAL;
+
+ cm_info.ipv4 = true;
+ memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
+ memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_port = ntohs(raddr->sin_port);
+ cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
+ } else {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
+ return -EINVAL;
+
+ cm_info.ipv4 = false;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(cm_info.rem_addr,
+ raddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ cm_info.rem_port = ntohs(raddr6->sin6_port);
+ irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
+ }
+ cm_info.cm_id = cm_id;
+ cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
+ cm_info.tos = cm_id->tos;
+ cm_info.user_pri = rt_tos2priority(cm_id->tos);
+
+ if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri))
+ return -ENOMEM;
+
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB, "DCB: TOS:[%d] UP:[%d]\n",
+ cm_id->tos, cm_info.user_pri);
+
+ trace_irdma_dcb_tos(iwdev, cm_id->tos, cm_info.user_pri);
+
+ cm_id->add_ref(cm_id);
+ ret = irdma_create_cm_node(&iwdev->cm_core, iwdev,
+ conn_param, &cm_info, &cm_node);
+ if (ret) {
+ cm_id->rem_ref(cm_id);
+ return ret;
+ }
+ ret = cm_node->cm_core->cm_create_ah(cm_node, true);
+ if (ret)
+ goto err;
+ if (irdma_manage_qhash(iwdev,
+ &cm_info,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL,
+ true)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ cm_node->qhash_set = true;
+
+ if (irdma_manage_apbvt(iwdev, cm_info.loc_port,
+ IRDMA_MANAGE_APBVT_ADD)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ cm_node->apbvt_set = true;
+ iwqp->cm_node = cm_node;
+ cm_node->iwqp = iwqp;
+ iwqp->cm_id = cm_id;
+ irdma_add_ref(&iwqp->ibqp);
+
+ if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
+ cm_node->state = IRDMA_CM_STATE_SYN_SENT;
+ ret = irdma_send_syn(cm_node, 0);
+ if (ret)
+ goto err;
+ }
+
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "rem_port=0x%04x, loc_port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_port,
+ cm_node->loc_port,
+ cm_node,
+ cm_node->cm_id);
+
+ trace_irdma_connect(cm_node, 0, NULL);
+
+ return 0;
+
+err:
+ if (cm_info.ipv4)
+ irdma_debug(&iwdev->rf->sc_dev,
+ IRDMA_DEBUG_CM,
+ "connect() FAILED: dest addr=%pI4",
+ cm_info.rem_addr);
+ else
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
+ "connect() FAILED: dest addr=%pI6",
+ cm_info.rem_addr);
+ irdma_rem_ref_cm_node(cm_node);
+ cm_id->rem_ref(cm_id);
+ iwdev->cm_core.stats_connect_errs++;
+
+ return ret;
+}
+
+/**
+ * irdma_create_listen - registered call creating listener
+ * @cm_id: cm information for passive connection
+ * @backlog: to max accept pending count
+ */
+int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct irdma_device *iwdev;
+ struct irdma_cm_listener *cm_listen_node;
+ struct irdma_cm_info cm_info = {};
+ enum irdma_status_code err;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in6 *laddr6;
+ bool wildcard = false;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
+
+ if (laddr->sin_family == AF_INET) {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
+ return -EINVAL;
+
+ cm_info.ipv4 = true;
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+
+ if (laddr->sin_addr.s_addr != htonl(INADDR_ANY))
+ cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
+ else
+ wildcard = true;
+
+ } else {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
+ return -EINVAL;
+
+ cm_info.ipv4 = false;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY)
+ irdma_netdev_vlan_ipv6(cm_info.loc_addr,
+ &cm_info.vlan_id,
+ NULL);
+ else
+ wildcard = true;
+ }
+ cm_info.backlog = backlog;
+ cm_info.cm_id = cm_id;
+
+ trace_irdma_create_listen(iwdev, &cm_info);
+
+ cm_listen_node = irdma_make_listen_node(&iwdev->cm_core, iwdev,
+ &cm_info);
+ if (!cm_listen_node) {
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "cm_listen_node == NULL\n");
+ return -ENOMEM;
+ }
+
+ cm_id->provider_data = cm_listen_node;
+
+ cm_listen_node->tos = cm_id->tos;
+ cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = cm_listen_node->user_pri;
+ if (!cm_listen_node->reused_node) {
+ if (wildcard) {
+ if (cm_info.ipv4)
+ err = irdma_add_mqh_4(iwdev,
+ &cm_info,
+ cm_listen_node);
+ else
+ err = irdma_add_mqh_6(iwdev,
+ &cm_info,
+ cm_listen_node);
+ if (err)
+ goto error;
+
+ err = irdma_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ IRDMA_MANAGE_APBVT_ADD);
+
+ if (err)
+ goto error;
+ } else {
+ err = irdma_manage_qhash(iwdev,
+ &cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL,
+ true);
+ if (err)
+ goto error;
+
+ cm_listen_node->qhash_set = true;
+ err = irdma_manage_apbvt(iwdev,
+ cm_info.loc_port,
+ IRDMA_MANAGE_APBVT_ADD);
+ if (err)
+ goto error;
+ }
+ }
+ cm_id->add_ref(cm_id);
+ cm_listen_node->cm_core->stats_listen_created++;
+ return 0;
+
+error:
+
+ irdma_cm_del_listen(&iwdev->cm_core, (void *)cm_listen_node, false);
+
+ return -EINVAL;
+}
+
+/**
+ * irdma_destroy_listen - registered call to destroy listener
+ * @cm_id: cm information for passive connection
+ */
+int irdma_destroy_listen(struct iw_cm_id *cm_id)
+{
+ struct irdma_device *iwdev;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (cm_id->provider_data)
+ irdma_cm_del_listen(&iwdev->cm_core, cm_id->provider_data,
+ true);
+ else
+ irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_ERR,
+ "cm_id->provider_data was NULL\n");
+
+ cm_id->rem_ref(cm_id);
+
+ return 0;
+}
+
+/**
+ * irdma_cm_event_connected - handle connected active node
+ * @event: the info for cm_node of connection
+ */
+static void irdma_cm_event_connected(struct irdma_cm_event *event)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_cm_core *cm_core;
+ struct irdma_cm_node *cm_node;
+ struct irdma_sc_dev *dev;
+ struct ib_qp_attr attr = {};
+ struct iw_cm_id *cm_id;
+ unsigned long flags;
+ int status;
+ bool read0;
+ int wait_ret = 0;
+
+ cm_node = event->cm_node;
+ cm_id = cm_node->cm_id;
+ iwqp = (struct irdma_qp *)cm_id->provider_data;
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->rf->sc_dev;
+ cm_core = &iwdev->cm_core;
+
+ if (iwqp->destroyed) {
+ status = -ETIMEDOUT;
+ goto error;
+ }
+
+ irdma_cm_init_tsa_conn(iwqp, cm_node);
+ read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+ dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
+ if (iwqp->page)
+ kunmap(iwqp->page);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_move_tail(&cm_node->list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ if (dev->hw_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
+ wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
+ iwqp->rts_ae_rcvd,
+ IRDMA_MAX_TIMEOUT);
+ if (!wait_ret)
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_CM,
+ "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
+ cm_node,
+ cm_node->loc_port,
+ cm_node->rem_port,
+ cm_node->cm_id);
+ }
+
+ cm_node->accelerated = true;
+ irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
+ cm_node->cm_core->cm_free_ah(cm_node);
+ return;
+
+error:
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ irdma_send_cm_event(event->cm_node,
+ cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ status);
+ cm_id->rem_ref(cm_id);
+ irdma_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * irdma_cm_event_reset - handle reset
+ * @event: the info for cm_node of connection
+ */
+static void irdma_cm_event_reset(struct irdma_cm_event *event)
+{
+ struct irdma_cm_node *cm_node = event->cm_node;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct irdma_qp *iwqp;
+
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+ if (!iwqp)
+ return;
+
+ irdma_debug(cm_node->dev,
+ IRDMA_DEBUG_CM,
+ "reset event %p - cm_id = %p\n",
+ event->cm_node, cm_id);
+ iwqp->cm_id = NULL;
+
+ irdma_send_cm_event(cm_node, cm_node->cm_id,
+ IW_CM_EVENT_DISCONNECT, -ECONNRESET);
+ irdma_send_cm_event(cm_node, cm_node->cm_id,
+ IW_CM_EVENT_CLOSE, 0);
+}
+
+/**
+ * irdma_cm_event_handler - send event to cm upper layer
+ * @work: pointer of cm event info.
+ */
+static void irdma_cm_event_handler(struct work_struct *work)
+{
+ struct irdma_cm_event *event = container_of(work,
+ struct irdma_cm_event,
+ event_work);
+ struct irdma_cm_node *cm_node;
+
+ if (!event || !event->cm_node || !event->cm_node->cm_core)
+ return;
+
+ cm_node = event->cm_node;
+ trace_irdma_cm_event_handler(cm_node, event->type, NULL);
+
+ switch (event->type) {
+ case IRDMA_CM_EVENT_MPA_REQ:
+ irdma_send_cm_event(cm_node,
+ cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REQUEST,
+ 0);
+ break;
+ case IRDMA_CM_EVENT_RESET:
+ irdma_cm_event_reset(event);
+ break;
+ case IRDMA_CM_EVENT_CONNECTED:
+ if (!event->cm_node->cm_id ||
+ event->cm_node->state != IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_cm_event_connected(event);
+ break;
+ case IRDMA_CM_EVENT_MPA_REJECT:
+ if (!event->cm_node->cm_id ||
+ cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_send_cm_event(cm_node,
+ cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNREFUSED);
+ break;
+ case IRDMA_CM_EVENT_ABORTED:
+ if (!event->cm_node->cm_id ||
+ event->cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_event_connect_error(event);
+ break;
+ default:
+ irdma_debug(cm_node->dev, IRDMA_DEBUG_ERR,
+ "bad event type = %d\n", event->type);
+ break;
+ }
+
+ event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
+ irdma_rem_ref_cm_node(event->cm_node);
+ kfree(event);
+}
+
+/**
+ * irdma_cm_post_event - queue event request for worker thread
+ * @event: cm node's info for up event call
+ */
+static void irdma_cm_post_event(struct irdma_cm_event *event)
+{
+ atomic_inc(&event->cm_node->ref_count);
+ event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
+ INIT_WORK(&event->event_work, irdma_cm_event_handler);
+ queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+}
+
+/**
+ * irdma_qhash_ctrl - enable/disable qhash for list
+ * @iwdev: device pointer
+ * @parent_listen_node: parent listen node
+ * @nfo: cm info node
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ *
+ * Enables or disables the qhash for the node in the child
+ * listen list that matches ipaddr. If no matching IP was found
+ * it will allocate and add a new child listen node to the
+ * parent listen node. The listen_list_lock is assumed to be
+ * held when called.
+ */
+static void irdma_qhash_ctrl(struct irdma_device *iwdev,
+ struct irdma_cm_listener *parent_listen_node,
+ struct irdma_cm_info *nfo,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
+ struct irdma_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ enum irdma_status_code err;
+ bool node_allocated = false;
+ enum irdma_quad_hash_manage_type op = ifup ?
+ IRDMA_QHASH_MANAGE_TYPE_ADD :
+ IRDMA_QHASH_MANAGE_TYPE_DELETE;
+
+ list_for_each_safe(pos, tpos, child_listen_list) {
+ child_listen_node = list_entry(pos,
+ struct irdma_cm_listener,
+ child_listen_list);
+ if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
+ goto set_qhash;
+ }
+
+ /* if not found then add a child listener if interface is going up */
+ if (!ifup)
+ return;
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ if (!child_listen_node)
+ return;
+
+ node_allocated = true;
+ memcpy(child_listen_node, parent_listen_node,
+ sizeof(*child_listen_node));
+
+ memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
+
+set_qhash:
+ memcpy(nfo->loc_addr,
+ child_listen_node->loc_addr,
+ sizeof(nfo->loc_addr));
+ nfo->vlan_id = child_listen_node->vlan_id;
+ err = irdma_manage_qhash(iwdev, nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ op,
+ NULL,
+ false);
+ if (!err) {
+ child_listen_node->qhash_set = ifup;
+ if (node_allocated)
+ list_add(&child_listen_node->child_listen_list,
+ &parent_listen_node->child_listen_list);
+ } else if (node_allocated) {
+ kfree(child_listen_node);
+ }
+}
+
+/**
+ * irdma_cm_teardown_connections - teardown QPs
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @disconnect_all: flag indicating disconnect all QPs
+ * teardown QPs where source or destination addr matches ip addr
+ */
+void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct irdma_cm_node *cm_node;
+ struct list_head teardown_list;
+ struct ib_qp_attr attr;
+ struct irdma_sc_vsi *vsi = &iwdev->vsi;
+ struct irdma_sc_qp *sc_qp;
+ struct irdma_qp *qp;
+ int i;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&teardown_list);
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->accelerated_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node, list);
+ if (disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->teardown_entry, &teardown_list);
+ }
+ }
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->non_accelerated_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node, list);
+ if (disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->teardown_entry, &teardown_list);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &teardown_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node,
+ teardown_entry);
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (iwdev->reset)
+ irdma_cm_disconn(cm_node->iwqp);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+ if (!iwdev->roce_mode)
+ return;
+
+ INIT_LIST_HEAD(&teardown_list);
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ spin_lock_irqsave(&vsi->qos[i].lock, flags);
+ list_for_each_safe(list_node, list_core_temp, &vsi->qos[i].qplist) {
+ u32 qp_ip[4];
+
+ sc_qp = container_of(list_node, struct irdma_sc_qp, list);
+ if (sc_qp->qp_type != IRDMA_QP_TYPE_ROCE_RC)
+ continue;
+
+ qp = sc_qp->back_qp;
+ if (!disconnect_all) {
+ if (nfo->ipv4)
+ qp_ip[0] = qp->udp_info.local_ipaddr3;
+ else
+ memcpy(qp_ip,
+ &qp->udp_info.local_ipaddr0,
+ sizeof(qp_ip));
+ }
+
+ if (disconnect_all ||
+ (nfo->vlan_id == qp->udp_info.vlan_tag &&
+ !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
+ irdma_add_ref(&qp->ibqp);
+ list_add(&qp->teardown_entry, &teardown_list);
+ }
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(&vsi->qos[i].lock, flags);
+ }
+
+ list_for_each_safe(list_node, list_core_temp, &teardown_list) {
+ qp = container_of(list_node, struct irdma_qp, teardown_entry);
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+ irdma_rem_ref(&qp->ibqp);
+ }
+}
+
+/**
+ * irdma_ifdown_notify - process an ifdown on an interface
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ */
+void irdma_if_notify(struct irdma_device *iwdev,
+ struct net_device *netdev,
+ u32 *ipaddr,
+ bool ipv4,
+ bool ifup)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ struct irdma_cm_info nfo;
+ u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ enum irdma_status_code ret;
+ enum irdma_quad_hash_manage_type op = ifup ?
+ IRDMA_QHASH_MANAGE_TYPE_ADD :
+ IRDMA_QHASH_MANAGE_TYPE_DELETE;
+
+ nfo.vlan_id = vlan_id;
+ nfo.ipv4 = ipv4;
+
+ /* Disable or enable qhash for listeners */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ if (vlan_id != listen_node->vlan_id)
+ continue;
+
+ if (memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) &&
+ memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))
+ continue;
+
+ memcpy(nfo.loc_addr, listen_node->loc_addr, sizeof(nfo.loc_addr));
+ nfo.loc_port = listen_node->loc_port;
+ nfo.user_pri = listen_node->user_pri;
+ if (!list_empty(&listen_node->child_listen_list)) {
+ irdma_qhash_ctrl(iwdev,
+ listen_node,
+ &nfo,
+ ipaddr, ipv4, ifup);
+ } else if (memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16)) {
+ ret = irdma_manage_qhash(iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ op,
+ NULL,
+ false);
+ if (!ret)
+ listen_node->qhash_set = ifup;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ /* disconnect any connected qp's on ifdown */
+ if (!ifup)
+ irdma_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
+}
diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
new file mode 100644
index 0000000..94fc824
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/cm.h
@@ -0,0 +1,424 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef IRDMA_CM_H
+#define IRDMA_CM_H
+
+#define IRDMA_MANAGE_APBVT_DEL 0
+#define IRDMA_MANAGE_APBVT_ADD 1
+
+#define IRDMA_MPA_REQUEST_ACCEPT 1
+#define IRDMA_MPA_REQUEST_REJECT 2
+
+/* IETF MPA -- defines */
+#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VER 1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE 20
+#define IETF_RTR_MSG_SIZE 4
+#define IETF_MPA_V2_FLAG 0x10
+#define SNDMARKER_SEQNMASK 0x000001ff
+#define IRDMA_MAX_IETF_SIZE 32
+
+/* IETF RTR MSG Fields */
+#define IETF_PEER_TO_PEER 0x8000
+#define IETF_FLPDU_ZERO_LEN 0x4000
+#define IETF_RDMA0_WRITE 0x8000
+#define IETF_RDMA0_READ 0x4000
+#define IETF_NO_IRD_ORD 0x3fff
+
+/* HW-supported IRD sizes*/
+#define IRDMA_HW_IRD_SETTING_2 2
+#define IRDMA_HW_IRD_SETTING_4 4
+#define IRDMA_HW_IRD_SETTING_8 8
+#define IRDMA_HW_IRD_SETTING_16 16
+#define IRDMA_HW_IRD_SETTING_32 32
+#define IRDMA_HW_IRD_SETTING_64 64
+#define IRDMA_HW_IRD_SETTING_128 128
+
+#define MAX_PORTS 65536
+
+#define IRDMA_PASSIVE_STATE_INDICATED 0
+#define IRDMA_DO_NOT_SEND_RESET_EVENT 1
+#define IRDMA_SEND_RESET_EVENT 2
+
+#define MAX_IRDMA_IFS 4
+
+#define SET_ACK 1
+#define SET_SYN 2
+#define SET_FIN 4
+#define SET_RST 8
+
+#define TCP_OPTIONS_PADDING 3
+
+#define IRDMA_DEFAULT_RETRYS 64
+#define IRDMA_DEFAULT_RETRANS 8
+#define IRDMA_DEFAULT_TTL 0x40
+#define IRDMA_DEFAULT_RTT_VAR 6
+#define IRDMA_DEFAULT_SS_THRESH 0x3fffffff
+#define IRDMA_DEFAULT_REXMIT_THRESH 8
+
+#define IRDMA_RETRY_TIMEOUT HZ
+#define IRDMA_SHORT_TIME 10
+#define IRDMA_LONG_TIME (2 * HZ)
+#define IRDMA_MAX_TIMEOUT ((unsigned long)(12 * HZ))
+
+#define IRDMA_CM_HASHTABLE_SIZE 1024
+#define IRDMA_CM_TCP_TIMER_INTERVAL 3000
+#define IRDMA_CM_DEFAULT_MTU 1540
+#define IRDMA_CM_DEFAULT_FRAME_CNT 10
+#define IRDMA_CM_THREAD_STACK_SIZE 256
+#define IRDMA_CM_DEFAULT_RCV_WND 64240
+#define IRDMA_CM_DEFAULT_RCV_WND_SCALED 0x3fffc
+#define IRDMA_CM_DEFAULT_RCV_WND_SCALE 2
+#define IRDMA_CM_DEFAULT_FREE_PKTS 10
+#define IRDMA_CM_FREE_PKT_LO_WATERMARK 2
+#define IRDMA_CM_DEFAULT_MSS 536
+#define IRDMA_CM_DEFAULT_MPA_VER 2
+#define IRDMA_CM_DEFAULT_SEQ 0x159bf75f
+#define IRDMA_CM_DEFAULT_LOCAL_ID 0x3b47
+#define IRDMA_CM_DEFAULT_SEQ2 0x18ed5740
+#define IRDMA_CM_DEFAULT_LOCAL_ID2 0xb807
+#define IRDMA_MAX_CM_BUF (IRDMA_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
+
+enum ietf_mpa_flags {
+ IETF_MPA_FLAGS_REJECT = 0x20,
+ IETF_MPA_FLAGS_CRC = 0x40,
+ IETF_MPA_FLAGS_MARKERS = 0x80,
+};
+
+enum irdma_timer_type {
+ IRDMA_TIMER_TYPE_SEND,
+ IRDMA_TIMER_TYPE_RECV,
+ IRDMA_TIMER_NODE_CLEANUP,
+ IRDMA_TIMER_TYPE_CLOSE,
+};
+
+enum option_nums {
+ OPTION_NUM_EOL,
+ OPTION_NUM_NONE,
+ OPTION_NUM_MSS,
+ OPTION_NUM_WINDOW_SCALE,
+ OPTION_NUM_SACK_PERM,
+ OPTION_NUM_SACK,
+ OPTION_NUM_WRITE0 = 0xbc,
+};
+
+/* cm node transition states */
+enum irdma_cm_node_state {
+ IRDMA_CM_STATE_UNKNOWN,
+ IRDMA_CM_STATE_INITED,
+ IRDMA_CM_STATE_LISTENING,
+ IRDMA_CM_STATE_SYN_RCVD,
+ IRDMA_CM_STATE_SYN_SENT,
+ IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED,
+ IRDMA_CM_STATE_ESTABLISHED,
+ IRDMA_CM_STATE_ACCEPTING,
+ IRDMA_CM_STATE_MPAREQ_SENT,
+ IRDMA_CM_STATE_MPAREQ_RCVD,
+ IRDMA_CM_STATE_MPAREJ_RCVD,
+ IRDMA_CM_STATE_OFFLOADED,
+ IRDMA_CM_STATE_FIN_WAIT1,
+ IRDMA_CM_STATE_FIN_WAIT2,
+ IRDMA_CM_STATE_CLOSE_WAIT,
+ IRDMA_CM_STATE_TIME_WAIT,
+ IRDMA_CM_STATE_LAST_ACK,
+ IRDMA_CM_STATE_CLOSING,
+ IRDMA_CM_STATE_LISTENER_DESTROYED,
+ IRDMA_CM_STATE_CLOSED,
+};
+
+enum mpa_frame_ver {
+ IETF_MPA_V1 = 1,
+ IETF_MPA_V2 = 2,
+};
+
+enum mpa_frame_key {
+ MPA_KEY_REQUEST,
+ MPA_KEY_REPLY,
+};
+
+enum send_rdma0 {
+ SEND_RDMA_READ_ZERO = 1,
+ SEND_RDMA_WRITE_ZERO = 2,
+};
+
+enum irdma_tcpip_pkt_type {
+ IRDMA_PKT_TYPE_UNKNOWN,
+ IRDMA_PKT_TYPE_SYN,
+ IRDMA_PKT_TYPE_SYNACK,
+ IRDMA_PKT_TYPE_ACK,
+ IRDMA_PKT_TYPE_FIN,
+ IRDMA_PKT_TYPE_RST,
+};
+
+enum irdma_cm_listener_state {
+ IRDMA_CM_LISTENER_PASSIVE_STATE = 1,
+ IRDMA_CM_LISTENER_ACTIVE_STATE = 2,
+ IRDMA_CM_LISTENER_EITHER_STATE = 3,
+};
+
+/* CM event codes */
+enum irdma_cm_event_type {
+ IRDMA_CM_EVENT_UNKNOWN,
+ IRDMA_CM_EVENT_ESTABLISHED,
+ IRDMA_CM_EVENT_MPA_REQ,
+ IRDMA_CM_EVENT_MPA_CONNECT,
+ IRDMA_CM_EVENT_MPA_ACCEPT,
+ IRDMA_CM_EVENT_MPA_REJECT,
+ IRDMA_CM_EVENT_MPA_ESTABLISHED,
+ IRDMA_CM_EVENT_CONNECTED,
+ IRDMA_CM_EVENT_RESET,
+ IRDMA_CM_EVENT_ABORTED,
+};
+
+struct irdma_bth { /* Base Trasnport Header */
+ u8 opcode;
+ u8 flags;
+ __be16 pkey;
+ __be32 qpn;
+ __be32 apsn;
+};
+
+struct ietf_mpa_v1 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ u8 priv_data[0];
+};
+
+struct ietf_rtr_msg {
+ __be16 ctrl_ird;
+ __be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ struct ietf_rtr_msg rtr_msg;
+ u8 priv_data[0];
+};
+
+struct option_base {
+ u8 optionnum;
+ u8 len;
+};
+
+struct option_mss {
+ u8 optionnum;
+ u8 len;
+ __be16 mss;
+};
+
+struct option_windowscale {
+ u8 optionnum;
+ u8 len;
+ u8 shiftcount;
+};
+
+union all_known_options {
+ char eol;
+ struct option_base base;
+ struct option_mss mss;
+ struct option_windowscale windowscale;
+};
+
+struct irdma_timer_entry {
+ struct list_head list;
+ unsigned long timetosend; /* jiffies */
+ struct irdma_puda_buf *sqbuf;
+ u32 type;
+ u32 retrycount;
+ u32 retranscount;
+ u32 context;
+ u32 send_retrans;
+ int close_when_complete;
+};
+
+/* CM context params */
+struct irdma_cm_tcp_context {
+ u8 client;
+ u32 loc_seq_num;
+ u32 loc_ack_num;
+ u32 rem_ack_num;
+ u32 rcv_nxt;
+ u32 loc_id;
+ u32 rem_id;
+ u32 snd_wnd;
+ u32 max_snd_wnd;
+ u32 rcv_wnd;
+ u32 mss;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+};
+
+struct irdma_cm_listener {
+ struct list_head list;
+ struct iw_cm_id *cm_id;
+ struct irdma_cm_core *cm_core;
+ struct irdma_device *iwdev;
+ struct list_head child_listen_list;
+ enum irdma_cm_listener_state listener_state;
+ atomic_t ref_count;
+ atomic_t pend_accepts_cnt;
+ u32 loc_addr[4];
+ u32 reused_node;
+ int backlog;
+ u16 loc_port;
+ u16 vlan_id;
+ u8 loc_mac[ETH_ALEN];
+ u8 user_pri;
+ u8 tos;
+ bool qhash_set;
+ bool ipv4;
+};
+
+struct irdma_kmem_info {
+ void *addr;
+ u32 size;
+};
+
+struct irdma_cm_node {
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct irdma_cm_tcp_context tcp_cntxt;
+ struct irdma_cm_core *cm_core;
+ struct irdma_timer_entry *send_entry;
+ struct irdma_timer_entry *close_entry;
+ struct irdma_cm_listener *listener;
+ struct list_head timer_entry;
+ struct list_head reset_entry;
+ struct list_head teardown_entry;
+ struct irdma_kmem_info pdata;
+ struct irdma_sc_ah *ah;
+ union {
+ struct ietf_mpa_v1 mpa_frame;
+ struct ietf_mpa_v2 mpa_v2_frame;
+ };
+ struct irdma_kmem_info mpa_hdr;
+ struct iw_cm_id *cm_id;
+ struct list_head list;
+ spinlock_t retrans_list_lock; /* protect CM node rexmit updates*/
+ atomic_t passive_state;
+ atomic_t ref_count;
+ enum irdma_cm_node_state state;
+ enum send_rdma0 send_rdma0_op;
+ enum mpa_frame_ver mpa_frame_rev;
+ u32 loc_addr[4], rem_addr[4];
+ u16 loc_port, rem_port;
+ bool accelerated;
+ int apbvt_set;
+ int accept_pend;
+ u16 vlan_id;
+ u16 ird_size;
+ u16 ord_size;
+ u16 mpav2_ird_ord;
+ u16 lsmm_size;
+ u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+ u8 user_pri;
+ u8 tos;
+ bool ack_rcvd;
+ bool qhash_set;
+ bool ipv4;
+ bool snd_mark_en;
+};
+
+/* Used by internal CM APIs to pass CM information*/
+struct irdma_cm_info {
+ struct iw_cm_id *cm_id;
+ u16 loc_port;
+ u16 rem_port;
+ u32 loc_addr[4];
+ u32 rem_addr[4];
+ u32 qh_qpid;
+ u16 vlan_id;
+ int backlog;
+ u8 user_pri;
+ u8 tos;
+ bool ipv4;
+};
+
+struct irdma_cm_event {
+ enum irdma_cm_event_type type;
+ struct irdma_cm_info cm_info;
+ struct work_struct event_work;
+ struct irdma_cm_node *cm_node;
+};
+
+struct irdma_cm_core {
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct list_head listen_nodes;
+ struct list_head accelerated_list;
+ struct list_head non_accelerated_list;
+ struct timer_list tcp_timer;
+ struct workqueue_struct *event_wq;
+ struct workqueue_struct *disconn_wq;
+ spinlock_t ht_lock; /* protect CM node (active side) list */
+ spinlock_t listen_list_lock; /* protect listener list */
+ spinlock_t apbvt_lock; /*serialize apbvt add/del entries*/
+ unsigned long ports_in_use[BITS_TO_LONGS(MAX_PORTS)];
+ u64 stats_nodes_created;
+ u64 stats_nodes_destroyed;
+ u64 stats_listen_created;
+ u64 stats_listen_destroyed;
+ u64 stats_listen_nodes_created;
+ u64 stats_listen_nodes_destroyed;
+ u64 stats_lpbs;
+ u64 stats_accepts;
+ u64 stats_rejects;
+ u64 stats_connect_errs;
+ u64 stats_passive_errs;
+ u64 stats_pkt_retrans;
+ u64 stats_backlog_drops;
+ struct irdma_puda_buf * (*form_cm_frame)(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_kmem_info *pdata,
+ u8 flags);
+ int (*cm_create_ah)(struct irdma_cm_node *cm_node, bool wait);
+ void (*cm_free_ah)(struct irdma_cm_node *cm_node);
+};
+
+int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *sqbuf,
+ enum irdma_timer_type type,
+ int send_retrans,
+ int close_when_complete);
+int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int irdma_create_listen(struct iw_cm_id *cm_id, int backlog);
+int irdma_destroy_listen(struct iw_cm_id *cm_id);
+void irdma_cm_teardown_connections(struct irdma_device *iwdev,
+ u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all);
+int irdma_cm_start(struct irdma_device *dev);
+int irdma_cm_stop(struct irdma_device *dev);
+bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr);
+bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr);
+int irdma_arp_table(struct irdma_pci_f *rf,
+ u32 *ip_addr,
+ bool ipv4,
+ u8 *mac_addr,
+ u32 action);
+int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac);
+void irdma_if_notify(struct irdma_device *iwdev,
+ struct net_device *netdev,
+ u32 *ipaddr,
+ bool ipv4,
+ bool ifup);
+bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port);
+void irdma_send_ack(struct irdma_cm_node *cm_node);
+void irdma_lpb_nop(struct irdma_sc_qp *qp);
+u8 irdma_derive_hw_ird_setting(u16 cm_ird);
+#endif /* IRDMA_CM_H */
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH v2 perf,bpf 11/11] perf, bpf: save information about short living bpf programs
From: Song Liu @ 2019-02-15 17:13 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Netdev, linux-kernel, ast@kernel.org, daniel@iogearbox.net,
Kernel Team, peterz@infradead.org, jolsa@kernel.org,
namhyung@kernel.org
In-Reply-To: <20190215144146.GF5784@redhat.com>
> On Feb 15, 2019, at 6:41 AM, Arnaldo Carvalho de Melo <acme@redhat.com> wrote:
>
> Em Thu, Feb 14, 2019 at 04:00:45PM -0800, Song Liu escreveu:
>> To annotate bpf programs in perf, it is necessary to save information in
>> bpf_prog_info and btf. For short living bpf program, it is necessary to
>> save these information before it is unloaded.
>>
>> This patch saves these information in a separate thread. This thread
>> creates its own evlist, that only tracks bpf events. This evlists uses
>> ring buffer with very low watermark for lower latency. When bpf load
>> events are received, this thread tries to gather information via sys_bpf
>> and save it in perf_env.
>>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> tools/perf/builtin-record.c | 13 ++++
>> tools/perf/builtin-top.c | 12 ++++
>> tools/perf/util/bpf-event.c | 129 ++++++++++++++++++++++++++++++++++++
>> tools/perf/util/bpf-event.h | 22 ++++++
>> tools/perf/util/evlist.c | 20 ++++++
>> tools/perf/util/evlist.h | 2 +
>> 6 files changed, 198 insertions(+)
>>
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index 2355e0a9eda0..46abb44aaaab 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -1106,6 +1106,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>> struct perf_data *data = &rec->data;
>> struct perf_session *session;
>> bool disabled = false, draining = false;
>> + struct bpf_event_poll_args poll_args;
>> + bool bpf_thread_running = false;
>> int fd;
>>
>> atexit(record__sig_exit);
>> @@ -1206,6 +1208,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>> goto out_child;
>> }
>>
>> + if (rec->opts.bpf_event) {
>> + poll_args.env = &session->header.env;
>> + poll_args.target = &rec->opts.target;
>> + poll_args.done = &done;
>> + if (bpf_event__start_polling_thread(&poll_args) == 0)
>> + bpf_thread_running = true;
>> + }
>> +
>> err = record__synthesize(rec, false);
>> if (err < 0)
>> goto out_child;
>> @@ -1456,6 +1466,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>>
>> out_delete_session:
>> perf_session__delete(session);
>> +
>> + if (bpf_thread_running)
>> + bpf_event__stop_polling_thread(&poll_args);
>> return status;
>> }
>>
>> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
>> index 5271d7211b9c..2586ee081967 100644
>> --- a/tools/perf/builtin-top.c
>> +++ b/tools/perf/builtin-top.c
>> @@ -1524,10 +1524,12 @@ int cmd_top(int argc, const char **argv)
>> "number of thread to run event synthesize"),
>> OPT_END()
>> };
>> + struct bpf_event_poll_args poll_args;
>> const char * const top_usage[] = {
>> "perf top [<options>]",
>> NULL
>> };
>> + bool bpf_thread_running = false;
>> int status = hists__init();
>>
>> if (status < 0)
>> @@ -1652,8 +1654,18 @@ int cmd_top(int argc, const char **argv)
>> signal(SIGWINCH, winch_sig);
>> }
>>
>> + if (top.record_opts.bpf_event) {
>> + poll_args.env = &perf_env;
>> + poll_args.target = target;
>> + poll_args.done = &done;
>> + if (bpf_event__start_polling_thread(&poll_args) == 0)
>> + bpf_thread_running = true;
>> + }
>> status = __cmd_top(&top);
>>
>> + if (bpf_thread_running)
>> + bpf_event__stop_polling_thread(&poll_args);
>> +
>> out_delete_evlist:
>> perf_evlist__delete(top.evlist);
>>
>> diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
>> index 4f347d61ed96..0caf137c515b 100644
>> --- a/tools/perf/util/bpf-event.c
>> +++ b/tools/perf/util/bpf-event.c
>> @@ -8,6 +8,7 @@
>> #include "machine.h"
>> #include "env.h"
>> #include "session.h"
>> +#include "evlist.h"
>>
>> #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
>>
>> @@ -316,3 +317,131 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
>> free(event);
>> return err;
>> }
>> +
>> +static void perf_env_add_bpf_info(struct perf_env *env, u32 id)
>> +{
>> + struct bpf_prog_info_linear *info_linear;
>> + struct bpf_prog_info_node *info_node;
>> + struct btf *btf = NULL;
>> + u64 arrays;
>> + u32 btf_id;
>> + int fd;
>> +
>> + fd = bpf_prog_get_fd_by_id(id);
>> + if (fd < 0)
>> + return;
>> +
>> + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
>> + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
>> + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
>> + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
>> + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
>> + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
>> + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
>> +
>> + info_linear = bpf_program__get_prog_info_linear(fd, arrays);
>> + if (IS_ERR_OR_NULL(info_linear)) {
>> + pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
>> + goto out;
>> + }
>> +
>> + btf_id = info_linear->info.btf_id;
>> +
>> + info_node = malloc(sizeof(struct bpf_prog_info_node));
>> + if (info_node) {
>> + info_node->info_linear = info_linear;
>> + perf_env__insert_bpf_prog_info(env, info_node);
>> + } else
>> + free(info_linear);
>> +
>> + if (btf_id == 0)
>> + goto out;
>> +
>> + if (btf__get_from_id(btf_id, &btf)) {
>> + pr_debug("%s: failed to get BTF of id %u, aborting\n",
>> + __func__, btf_id);
>> + goto out;
>> + }
>> + perf_fetch_btf(env, btf_id, btf);
>> +
>> +out:
>> + free(btf);
>> + close(fd);
>> +}
>> +
>> +static void *bpf_poll_thread(void *arg)
>> +{
>> + struct bpf_event_poll_args *args = arg;
>> + int i;
>> +
>> + while (!*(args->done)) {
>> + perf_evlist__poll(args->evlist, 1000);
>> +
>> + for (i = 0; i < args->evlist->nr_mmaps; i++) {
>> + struct perf_mmap *map = &args->evlist->mmap[i];
>> + union perf_event *event;
>> +
>> + if (perf_mmap__read_init(map))
>> + continue;
>> + while ((event = perf_mmap__read_event(map)) != NULL) {
>> + pr_debug("processing vip event of type %d\n",
>> + event->header.type);
>> + switch (event->header.type) {
>> + case PERF_RECORD_BPF_EVENT:
>> + if (event->bpf_event.type != PERF_BPF_EVENT_PROG_LOAD)
>> + break;
>> + perf_env_add_bpf_info(args->env, event->bpf_event.id);
>> + break;
>> + default:
>> + break;
>> + }
>> + perf_mmap__consume(map);
>> + }
>> + perf_mmap__read_done(map);
>> + }
>> + }
>> + return NULL;
>> +}
>> +
>> +pthread_t poll_thread;
>> +
>> +int bpf_event__start_polling_thread(struct bpf_event_poll_args *args)
>> +{
>> + struct perf_evsel *counter;
>> +
>> + args->evlist = perf_evlist__new();
>> +
>> + if (args->evlist == NULL)
>> + return -1;
>> +
>> + if (perf_evlist__create_maps(args->evlist, args->target))
> goto out_delete_evlist;
>> +
>> + if (perf_evlist__add_bpf_tracker(args->evlist))
> goto out_delete_evlist;
>> +
>> + evlist__for_each_entry(args->evlist, counter) {
>> + if (perf_evsel__open(counter, args->evlist->cpus,
>> + args->evlist->threads) < 0)
> goto out_delete_evlist;
>> + }
>> +
>> + if (perf_evlist__mmap(args->evlist, UINT_MAX))
> goto out_delete_evlist;
>> +
>> + evlist__for_each_entry(args->evlist, counter) {
>> + if (perf_evsel__enable(counter))
> goto out_delete_evlist;
>> + }
>> +
>> + if (pthread_create(&poll_thread, NULL, bpf_poll_thread, args))
> goto out_delete_evlist;
>> +
>> + return 0;
> out_delete_evlist:
> perf_evlist__delete(args->evlist);
> args->evlist = NULL;
>
> return -1;
>> +}
>> +
>> +void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args)
>> +{
>> + pthread_join(poll_thread, NULL);
>> + perf_evlist__exit(args->evlist);
>> +}
>> diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
>> index c4f0f1395ea5..61914827c1e3 100644
>> --- a/tools/perf/util/bpf-event.h
>> +++ b/tools/perf/util/bpf-event.h
>> @@ -12,12 +12,17 @@
>> #include <bpf/libbpf.h>
>> #include <linux/btf.h>
>> #include <linux/rbtree.h>
>> +#include <pthread.h>
>> +#include <api/fd/array.h>
>> #include "event.h"
>>
>> struct machine;
>> union perf_event;
>> +struct perf_env;
>> struct perf_sample;
>> struct record_opts;
>> +struct evlist;
>> +struct target;
>>
>> struct bpf_prog_info_node {
>> struct bpf_prog_info_linear *info_linear;
>> @@ -31,6 +36,13 @@ struct btf_node {
>> char data[];
>> };
>>
>> +struct bpf_event_poll_args {
>> + struct perf_env *env;
>> + struct perf_evlist *evlist;
>> + struct target *target;
>> + volatile int *done;
>> +};
>> +
>> #ifdef HAVE_LIBBPF_SUPPORT
>> int machine__process_bpf_event(struct machine *machine, union perf_event *event,
>> struct perf_sample *sample);
>> @@ -39,6 +51,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
>> perf_event__handler_t process,
>> struct machine *machine,
>> struct record_opts *opts);
>> +int bpf_event__start_polling_thread(struct bpf_event_poll_args *args);
>> +void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args);
>> #else
>> static inline int machine__process_bpf_event(struct machine *machine __maybe_unused,
>> union perf_event *event __maybe_unused,
>> @@ -54,5 +68,13 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
>> {
>> return 0;
>> }
>> +
>> +static inline int bpf_event__start_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
>> +{
>> + return 0;
>> +}
>> +void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
>> +{
>> +}
>> #endif // HAVE_LIBBPF_SUPPORT
>> #endif
>> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
>> index 8c902276d4b4..612c079579ce 100644
>> --- a/tools/perf/util/evlist.c
>> +++ b/tools/perf/util/evlist.c
>> @@ -271,6 +271,26 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist)
>> return 0;
>> }
>>
>> +int perf_evlist__add_bpf_tracker(struct perf_evlist *evlist)
>> +{
>> + struct perf_event_attr attr = {
>> + .type = PERF_TYPE_SOFTWARE,
>> + .config = PERF_COUNT_SW_DUMMY,
>> + .watermark = 1,
>> + .bpf_event = 1,
>> + .wakeup_watermark = 1,
>> + .size = sizeof(attr), /* to capture ABI version */
>> + };
>> + struct perf_evsel *evsel = perf_evsel__new_idx(&attr,
>> + evlist->nr_entries);
>> +
>> + if (evsel == NULL)
>> + return -ENOMEM;
>> +
>> + perf_evlist__add(evlist, evsel);
>
> You could use:
>
> struct perf_evlist *evlist = perf_evlist__new_dummy();
> if (evlist != NULL) {
> struct perf_evsel *evsel == perf_evlist__first(evlist);
> evsel->attr.bpf_event = evsel->attr.watermark = evsel->attr.wakeup_watermark = 1;
> return 0;
> }
> return -1;
This looks cleaner. Let me fix in next version.
>
> Because in this case all you'll have in this evlist is the bpf tracker,
> right? The add_bpf_tracker would be handy if we would want to have a
> pre-existing evlist with some other events and wanted to add a bpf
> tracker, no?
I think all we need is a side-band evlist instead of the main evlist. May
be we should call it side-band evlist, and make it more generic?
Thanks,
Song
>
> - Arnaldo
>
>> + return 0;
>> +}
>> +
>> static int perf_evlist__add_attrs(struct perf_evlist *evlist,
>> struct perf_event_attr *attrs, size_t nr_attrs)
>> {
>> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
>> index 868294491194..a2d22715188e 100644
>> --- a/tools/perf/util/evlist.h
>> +++ b/tools/perf/util/evlist.h
>> @@ -84,6 +84,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
>>
>> int perf_evlist__add_dummy(struct perf_evlist *evlist);
>>
>> +int perf_evlist__add_bpf_tracker(struct perf_evlist *evlist);
>> +
>> int perf_evlist__add_newtp(struct perf_evlist *evlist,
>> const char *sys, const char *name, void *handler);
>>
>> --
>> 2.17.1
^ permalink raw reply
* [PATCH net] net: validate untrusted gso packets without csum offload
From: Willem de Bruijn @ 2019-02-15 17:15 UTC (permalink / raw)
To: netdev; +Cc: davem, edumazet, jasowang, maximmi, Willem de Bruijn, syzbot
From: Willem de Bruijn <willemb@google.com>
Syzkaller again found a path to a kernel crash through bad gso input.
By building an excessively large packet to cause an skb field to wrap.
If VIRTIO_NET_HDR_F_NEEDS_CSUM was set this would have been dropped in
skb_partial_csum_set.
GSO packets that do not set checksum offload are suspicious and rare.
Most callers of virtio_net_hdr_to_skb already pass them to
skb_probe_transport_header.
Move that test forward, change it to detect parse failure and drop
packets on failure as those cleary are not one of the legitimate
VIRTIO_NET_HDR_GSO types.
Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.")
Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
This captures a variety of bad gso packets, but to tighten further:
- drop SKB_GSO_DODGY packets with ipip/sit/.. , which cannot be legal.
by ipip_gso_segment wrappers around inet_gso_segment
expands on 121d57af308d ("gso: validate gso_type in GSO handlers")
- limit the number of ipv6 exthdrs allowed from dodgy sources.
not sure where to draw the line. but not at 64K ;)
- validate the network and transport protocol returned in
skb_probe_transport_header against the VIRTIO_NET_HDR_GSO type
- probe all dodgy GSO packets, also those that set checksum offload.
this will have a performance impact, discussed previously in
http://patchwork.ozlabs.org/patch/861874/
but it would have blocked this latest bug as well
All but the last one seem pretty uncontroversial to me. If no one
objects I plan to send those to net-next.
---
include/linux/skbuff.h | 2 +-
include/linux/virtio_net.h | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 95d25b010a25..4c1c82a5678c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2434,7 +2434,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
skb_set_transport_header(skb, keys.control.thoff);
- else
+ else if (offset_hint >= 0)
skb_set_transport_header(skb, offset_hint);
}
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index cb462f9ab7dd..71f2394abbf7 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -57,6 +57,15 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (!skb_partial_csum_set(skb, start, off))
return -EINVAL;
+ } else {
+ /* gso packets without NEEDS_CSUM do not set transport_offset.
+ * probe and drop if does not match one of the above types.
+ */
+ if (gso_type) {
+ skb_probe_transport_header(skb, -1);
+ if (!skb_transport_header_was_set(skb))
+ return -EINVAL;
+ }
}
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
--
2.21.0.rc0.258.g878e2cd30e-goog
^ permalink raw reply related
* [PATCH net-next] net/ipv6: prefer rcu_access_pointer() over rcu_dereference()
From: Paolo Abeni @ 2019-02-15 17:15 UTC (permalink / raw)
To: netdev; +Cc: David S. Miller, David Ahern
rt6_cache_allowed_for_pmtu() checks for rt->from presence, but
it does not access the RCU protected pointer. We can use
rcu_access_pointer() and clean-up the code a bit. No functional
changes intended.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/ipv6/route.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc066fdf7e46..87a0561136dd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
- bool from_set;
-
- rcu_read_lock();
- from_set = !!rcu_dereference(rt->from);
- rcu_read_unlock();
-
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || from_set);
+ (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
--
2.20.1
^ permalink raw reply related
* Re: [RFC v1 17/19] RDMA/irdma: Add ABI definitions
From: Jason Gunthorpe @ 2019-02-15 17:16 UTC (permalink / raw)
To: Shiraz Saleem
Cc: dledford, davem, linux-rdma, netdev, mustafa.ismail,
jeffrey.t.kirsher
In-Reply-To: <20190215171107.6464-18-shiraz.saleem@intel.com>
On Fri, Feb 15, 2019 at 11:11:04AM -0600, Shiraz Saleem wrote:
> From: Mustafa Ismail <mustafa.ismail@intel.com>
>
> Add ABI definitions for irdma.
>
> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
> include/uapi/rdma/irdma-abi.h | 140 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 140 insertions(+)
> create mode 100644 include/uapi/rdma/irdma-abi.h
>
> diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h
> new file mode 100644
> index 0000000..5b0e2d5
> +++ b/include/uapi/rdma/irdma-abi.h
> @@ -0,0 +1,140 @@
> +/*
> + * Copyright (c) 2006 - 2019 Intel Corporation. All rights reserved.
> + * Copyright (c) 2005 Topspin Communications. All rights reserved.
> + * Copyright (c) 2005 Cisco Systems. All rights reserved.
> + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses. You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * - Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer.
> + *
> + * - Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +#ifndef IRDMA_ABI_H
> +#define IRDMA_ABI_H
> +
> +#include <linux/types.h>
> +
> +#define IRDMA_ABI_VER 6
Starting with high numbers?
> +enum irdma_memreg_type {
> + IW_MEMREG_TYPE_MEM = 0,
> + IW_MEMREG_TYPE_QP = 1,
> + IW_MEMREG_TYPE_CQ = 2,
> + IW_MEMREG_TYPE_RSVD = 3,
> + IW_MEMREG_TYPE_MW = 4,
> +};
> +
> +struct irdma_alloc_ucontext_req {
> + __u32 rsvd32;
> + __u8 userspace_ver;
> + __u8 rsvd8[3];
> +};
> +
> +struct irdma_alloc_ucontext_resp {
> + __u8 kernel_ver;
> + __u8 rsvd[7];
> + struct irdma_hw_attrs hw_attrs;
This won't even compile like this - don't forget you have to send the
rdma-core PR along with the kernel patches. You should already be
running the travis checks yourself. rdma-core should detect malformed
user space headers..
> +struct irdma_mem_reg_req {
> + __u16 reg_type; /* Memory, QP or CQ */
> + __u16 cq_pages;
> + __u16 rq_pages;
> + __u16 sq_pages;
> +};
New structs should be aligned to 8 bytes.
> +struct i40iw_create_qp_resp {
> + __u32 qp_id;
> + __u32 actual_sq_size;
> + __u32 actual_rq_size;
> + __u32 i40iw_drv_opt;
> + __u16 push_idx;
> + __u8 lsmm;
> + __u8 rsvd2;
> +};
ditto
> +struct irdma_create_ah_resp {
> + __u32 ah_id;
> + __u32 rsvd[4];
typo? __u8?
Jason
^ permalink raw reply
* Re: [RFC 12/19] RDMA/irdma: Implement device supported verb APIs
From: Shiraz Saleem @ 2019-02-15 17:18 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: dledford, davem, linux-rdma, netdev, mustafa.ismail,
jeffrey.t.kirsher
In-Reply-To: <20190212222707.GU24692@ziepe.ca>
On Tue, Feb 12, 2019 at 03:27:07PM -0700, Jason Gunthorpe wrote:
> On Tue, Feb 12, 2019 at 03:43:55PM -0600, Shiraz Saleem wrote:
>
> > +/**
> > + * irdma_disassociate_ucontext - Disassociate user context
> > + * @context: ib user context
> > + */
> > +static void irdma_disassociate_ucontext(struct ib_ucontext *context)
> > +{
> > + struct irdma_ucontext *ucontext = to_ucontext(context);
> > +
> > + struct irdma_vma_data *vma_data, *n;
> > + struct vm_area_struct *vma;
> > +
> > + irdma_dev_info(&ucontext->iwdev->rf->sc_dev, "called\n");
> > + mutex_lock(&ucontext->vma_list_mutex);
> > + list_for_each_entry_safe(vma_data, n, &ucontext->vma_list, list) {
> > + vma = vma_data->vma;
> > + zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
> > +
> > + vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
> > + vma->vm_ops = NULL;
> > + list_del(&vma_data->list);
> > + kfree(vma_data);
> > + }
> > + mutex_unlock(&ucontext->vma_list_mutex);
> > +}
>
> You need to study all the changes that have been done in the core code
> and make sure this driver is using all the latest stuff, I do not want
> to review a driver and find it is full of obsolete APIs like this
> above.
>
I submitted a revised version addressing this problem. There were a few changes.
*updated disassociate_ucontext API
*updated alloc/dealloc PD APIs
*query ports via core
*remove MODULE_VER
*remove create_single_threaded_workqueue
*remove list_empty checks in irdma_dealloc_ucontext
*change all uapi headers to use __aligned_u64
Shiraz
^ permalink raw reply
* Re: [B.A.T.M.A.N.] [PATCH v2] batman-adv: Add multicast-to-unicast support for multiple targets
From: Linus Lüssing @ 2019-02-15 17:20 UTC (permalink / raw)
To: The list for a Better Approach To Mobile Ad-hoc Networking
Cc: netdev, Jiri Pirko, David Bauer
In-Reply-To: <11497659.iQavgr9jVl@bentobox>
On Thu, Feb 14, 2019 at 03:04:54PM +0100, Sven Eckelmann wrote:
> On Thursday, 14 February 2019 14:44:52 CET Linus Lüssing wrote:
> [...]
> > > No new sysfs config files.
> >
> > Why? The bridge for instance does the same.
>
> https://patchwork.open-mesh.org/patch/16763/ - here the quote
>
> On Samstag, 29. Oktober 2016 12:33:01 CEST Jiri Pirko wrote:
> > I strongly believe it is a huge mistake to use sysfs for things like
> > this. This should be done via generic netlink api.
>
> We don't need to configuration interfaces - we only need the preferred one. If
> this is sysfs for you guys then we should not have started with generic
> netlink at all. And why wasn't this brought up now *after* the stuff was
> merged by David. It isn't the first time that I've stated clearly that there
> should be no new sysfs configuration files when we switch to genl.
>
> If it now preferred to have sysfs again for configuration then please discuss
> it with the netdev folks and find out how the new generic netlink interface
> can be removed again before the next release.
>
> Kind regards,
> Sven
Sorry, then this is all my misunderstanding. I have no issue with
removing the sysfs part from this patch (I liked sysfs for
prototyping/testing/scripting, but as we have all configuration
options available in batctl that works for me, too).
Thanks for the clarifications.
Regards, Linus
^ permalink raw reply
* Re: [RFC v1 00/19] Add unified Intel Ethernet RDMA driver (irdma)
From: Jason Gunthorpe @ 2019-02-15 17:20 UTC (permalink / raw)
To: Shiraz Saleem
Cc: dledford, davem, linux-rdma, netdev, mustafa.ismail,
jeffrey.t.kirsher
In-Reply-To: <20190215171107.6464-1-shiraz.saleem@intel.com>
On Fri, Feb 15, 2019 at 11:10:47AM -0600, Shiraz Saleem wrote:
> The unified driver is currently being tested on X722 with rdma-core-v22.
> X722 is supported by this driver, but only if CONFIG_INFINIBAND_I40IW
> is disabled in the kernel. We desire to deprecate or phase out i40iw
> from the kernel and can submit patches to do the same. We would like to
> move forward with this unified driver model for current and future HW.
You need to do better than this.. rdma-core should setup to detect
which of the kernel drivers is present and do the right thing. This
may require some new infrastructure work, but siw and rxe need this
capability as well.
> This series was built against rdma-next.
> commit 21a428a019c9 ("RDMA: Handle PD allocations by IB/core")
> It includes both net and rdma patches for the purposes
> of review. It will be split into two separate patch series, one for
> net-next and rdma-next once RFC is accepted.
>
> [1] https://patchwork.kernel.org/patch/10419583/
Be aware we cannot take net-next into rdma. You would be best to
submit a clean -rcX based PR to both net and RDMA for your net
portion.
Jason
^ permalink raw reply
* Re: [RFC v1 01/19] net/i40e: Add peer register/unregister to struct i40e_netdev_priv
From: Jason Gunthorpe @ 2019-02-15 17:22 UTC (permalink / raw)
To: Shiraz Saleem
Cc: dledford, davem, linux-rdma, netdev, mustafa.ismail,
jeffrey.t.kirsher
In-Reply-To: <20190215171107.6464-2-shiraz.saleem@intel.com>
On Fri, Feb 15, 2019 at 11:10:48AM -0600, Shiraz Saleem wrote:
> Expose the register/unregister function pointers in the struct
> i40e_netdev_priv which is accesible via the netdev_priv() interface
> in the RDMA driver. On a netdev notification in the RDMA driver,
> the appropriate LAN driver register/unregister functions are invoked
> from the struct i40e_netdev_priv structure,
Why? In later patches we get an entire device_add() based thing. Why do
you need two things?
The RDMA driver should bind to the thing that device_add created and
from there reliably get the netdev. It should not listen to netdev
notifiers for attachment.
It would be excellent if you could make this more general as pretty
much every single RDMA driver has some open coded (and often wrongly
locked) version of this attachment process.
This series is very big, so if you can see a way to make a general
attachment scheme based around device_add/etc it would be a great
pre-cursor series.
Jason
^ permalink raw reply
* RE: [PATCH net-next 10/13] net: mvpp2: reset the XPCS while reconfiguring the serdes lanes
From: Stefan Chulski @ 2019-02-15 17:23 UTC (permalink / raw)
To: Russell King - ARM Linux admin, Antoine Tenart
Cc: davem@davemloft.net, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, thomas.petazzoni@bootlin.com,
maxime.chevallier@bootlin.com, gregory.clement@bootlin.com,
miquel.raynal@bootlin.com, Nadav Haklai, Yan Markman,
mw@semihalf.com
In-Reply-To: <20190215171224.sjfrid5csseywuks@shell.armlinux.org.uk>
> On Fri, Feb 15, 2019 at 04:32:38PM +0100, Antoine Tenart wrote:
> > The documentation advises to set the XPCS in reset while reconfiguring
> > the serdes lanes. This seems to be a good thing to do, but the PPv2
> > driver wasn't doing it. This patch fixes it.
>
> Hmm. That statment seems to have some ambiguity in it - we do two
> "reconfigurations" - one may be upon initialisation, where the lane is already
> configured for 10Gbase-KR, and we're re-initialising it for the same mode.
> The other case is when we're switching between 10Gbase-KR and SGMII, or
> as will be the case with 2.5G support for the Alaska PHYs, 2500base-X.
Exist one mode that we should add to PPv2&COMPHY 5Gbase-KR(5GBASE-T).
Stefan,
Best Regards.
^ permalink raw reply
* Re: [PATCH v2 perf,bpf 08/11] perf, bpf: save btf information as headers to perf.data
From: Song Liu @ 2019-02-15 17:25 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Netdev, linux-kernel, Alexei Starovoitov,
daniel@iogearbox.net, Kernel Team, peterz@infradead.org,
namhyung@kernel.org, acme@kernel.org
In-Reply-To: <20190215142643.GC5784@redhat.com>
> On Feb 15, 2019, at 6:26 AM, Arnaldo Carvalho de Melo <acme@redhat.com> wrote:
>
> Em Thu, Feb 14, 2019 at 04:00:09PM -0800, Song Liu escreveu:
>> This patch enables perf-record to save btf information as headers to
>> perf.data A new header type HEADER_BTF is introduced for this data.
>
> Jiri,
>
> Wouldn't it be better for this HEADER_BTF to be introduced
> already as an user space event, Song, see:
>
> tools/perf/util/event.h
>
> and:
>
> tools/perf/util/event.c
>
> perf_event__synthesize_cpu_map()
>
> - Arnaldo
>
BTF would be short living for short living BPF programs. I guess
saving them as header is easier than merging them with samples.
What's the benefit of saving them as user space events?
Thanks,
Song
>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> tools/perf/util/header.c | 99 +++++++++++++++++++++++++++++++++++++++-
>> tools/perf/util/header.h | 1 +
>> 2 files changed, 99 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index 2ae76a9d06f6..3f1562afe8e5 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -1125,6 +1125,45 @@ static int write_bpf_prog_info(struct feat_fd *ff,
>> return ret;
>> }
>>
>> +static int write_btf(struct feat_fd *ff,
>> + struct perf_evlist *evlist __maybe_unused)
>> +{
>> + struct perf_env *env = &ff->ph->env;
>> + struct rb_root *root;
>> + struct rb_node *next;
>> + u32 count = 0;
>> + int ret;
>> +
>> + down_read(&env->bpf_info_lock);
>> +
>> + root = &env->btfs;
>> + next = rb_first(root);
>> + while (next) {
>> + ++count;
>> + next = rb_next(next);
>> + }
>> +
>> + ret = do_write(ff, &count, sizeof(count));
>> +
>> + if (ret < 0)
>> + goto out;
>> +
>> + next = rb_first(root);
>> + while (next) {
>> + struct btf_node *node;
>> +
>> + node = rb_entry(next, struct btf_node, rb_node);
>> + next = rb_next(&node->rb_node);
>> + ret = do_write(ff, node,
>> + sizeof(struct btf_node) + node->data_size);
>> + if (ret < 0)
>> + goto out;
>> + }
>> +out:
>> + up_read(&env->bpf_info_lock);
>> + return ret;
>> +}
>> +
>> static int cpu_cache_level__sort(const void *a, const void *b)
>> {
>> struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
>> @@ -1628,6 +1667,28 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
>> up_read(&env->bpf_info_lock);
>> }
>>
>> +static void print_btf(struct feat_fd *ff, FILE *fp)
>> +{
>> + struct perf_env *env = &ff->ph->env;
>> + struct rb_root *root;
>> + struct rb_node *next;
>> +
>> + down_read(&env->bpf_info_lock);
>> +
>> + root = &env->btfs;
>> + next = rb_first(root);
>> +
>> + while (next) {
>> + struct btf_node *node;
>> +
>> + node = rb_entry(next, struct btf_node, rb_node);
>> + next = rb_next(&node->rb_node);
>> + fprintf(fp, "# bpf_prog_info of id %u\n", node->id);
>> + }
>> +
>> + up_read(&env->bpf_info_lock);
>> +}
>> +
>> static void free_event_desc(struct perf_evsel *events)
>> {
>> struct perf_evsel *evsel;
>> @@ -2723,6 +2784,41 @@ static int process_bpf_prog_info(struct feat_fd *ff,
>> return err;
>> }
>>
>> +static int process_btf(struct feat_fd *ff, void *data __maybe_unused)
>> +{
>> + struct perf_env *env = &ff->ph->env;
>> + u32 count, i;
>> +
>> + if (do_read_u32(ff, &count))
>> + return -1;
>> +
>> + down_write(&env->bpf_info_lock);
>> +
>> + for (i = 0; i < count; ++i) {
>> + struct btf_node btf_node;
>> + struct btf_node *node;
>> +
>> + if (__do_read(ff, &btf_node, sizeof(struct btf_node)))
>> + return -1;
>> +
>> + node = malloc(sizeof(struct btf_node) + btf_node.data_size);
>> + if (!node)
>> + return -1;
>> +
>> + node->id = btf_node.id;
>> + node->data_size = btf_node.data_size;
>> +
>> + if (__do_read(ff, node->data, btf_node.data_size)) {
>> + free(node);
>> + return -1;
>> + }
>> + perf_env__insert_btf(env, node);
>> + }
>> +
>> + up_write(&env->bpf_info_lock);
>> + return 0;
>> +}
>> +
>> struct feature_ops {
>> int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
>> void (*print)(struct feat_fd *ff, FILE *fp);
>> @@ -2783,7 +2879,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
>> FEAT_OPR(SAMPLE_TIME, sample_time, false),
>> FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
>> FEAT_OPR(CLOCKID, clockid, false),
>> - FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false)
>> + FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
>> + FEAT_OPR(BTF, btf, false)
>> };
>>
>> struct header_print_data {
>> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
>> index 0785c91b4c3a..ba51d8e43c53 100644
>> --- a/tools/perf/util/header.h
>> +++ b/tools/perf/util/header.h
>> @@ -40,6 +40,7 @@ enum {
>> HEADER_MEM_TOPOLOGY,
>> HEADER_CLOCKID,
>> HEADER_BPF_PROG_INFO,
>> + HEADER_BTF,
>> HEADER_LAST_FEATURE,
>> HEADER_FEAT_BITS = 256,
>> };
>> --
>> 2.17.1
^ permalink raw reply
* Re: [RFC v1 12/19] RDMA/irdma: Implement device supported verb APIs
From: Jason Gunthorpe @ 2019-02-15 17:35 UTC (permalink / raw)
To: Shiraz Saleem
Cc: dledford, davem, linux-rdma, netdev, mustafa.ismail,
jeffrey.t.kirsher
In-Reply-To: <20190215171107.6464-13-shiraz.saleem@intel.com>
On Fri, Feb 15, 2019 at 11:10:59AM -0600, Shiraz Saleem wrote:
> +static int irdma_alloc_pd(struct ib_pd *pd,
> + struct ib_ucontext *context,
> + struct ib_udata *udata)
> +{
> + struct irdma_pd *iwpd = to_iwpd(pd);
> + struct irdma_device *iwdev = to_iwdev(pd->device);
> + struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
> + struct irdma_pci_f *rf = iwdev->rf;
> + struct irdma_alloc_pd_resp uresp = {};
> + struct irdma_sc_pd *sc_pd;
> + struct irdma_ucontext *ucontext;
> + u32 pd_id = 0;
> + int err;
> +
> + if (iwdev->closing)
> + return -ENODEV;
No crazy unlocked 'closing' flags. The core code takes care of
everything a driver needs to worry about if you use it properly.
> +/**
> + * irdma_create_cq - create cq
> + * @ibdev: device pointer from stack
> + * @attr: attributes for cq
> + * @context: user context created during alloc
> + * @udata: user data
> + */
> +static struct ib_cq *irdma_create_cq(struct ib_device *ibdev,
> + const struct ib_cq_init_attr *attr,
> + struct ib_ucontext *context,
> + struct ib_udata *udata)
> +{
> + struct irdma_device *iwdev = to_iwdev(ibdev);
> + struct irdma_pci_f *rf = iwdev->rf;
> + struct irdma_cq *iwcq;
> + struct irdma_pbl *iwpbl;
> + u32 cq_num = 0;
> + struct irdma_sc_cq *cq;
> + struct irdma_sc_dev *dev = &rf->sc_dev;
> + struct irdma_cq_init_info info = {};
> + enum irdma_status_code status;
> + struct irdma_cqp_request *cqp_request;
> + struct cqp_cmds_info *cqp_info;
> + struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
> + unsigned long flags;
> + int err_code;
> + int entries = attr->cqe;
> +
> + if (iwdev->closing)
> + return ERR_PTR(-ENODEV);
> +
> + if (entries > rf->max_cqe)
> + return ERR_PTR(-EINVAL);
> +
> + iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
> + if (!iwcq)
> + return ERR_PTR(-ENOMEM);
> +
> + err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs,
> + rf->max_cq, &cq_num,
> + &rf->next_cq);
> + if (err_code)
> + goto error;
> +
> + cq = &iwcq->sc_cq;
> + cq->back_cq = (void *)iwcq;
> + spin_lock_init(&iwcq->lock);
> + info.dev = dev;
> + ukinfo->cq_size = max(entries, 4);
> + ukinfo->cq_id = cq_num;
> + iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
> + if (attr->comp_vector < rf->ceqs_count)
> + info.ceq_id = attr->comp_vector;
> + info.ceq_id_valid = true;
> + info.ceqe_mask = 1;
> + info.type = IRDMA_CQ_TYPE_IWARP;
> + info.vsi = &iwdev->vsi;
> +
> + if (context) {
Drivers should rarely write 'if context'. The test for userspaceness
is 'if (udata)' - and in this case context is guarenteed. Lots of
places with this wrong..
Also this will need to be rebased as this all changed.
> + return (struct ib_cq *)iwcq;
And don't write casts like that, &iwcq->ib_qp or something.
Find and fix them all please.
> +/**
> + * irdma_set_page - populate pbl list for fmr
> + * @ibmr: ib mem to access iwarp mr pointer
> + * @addr: page dma address fro pbl list
> + */
> +static int irdma_set_page(struct ib_mr *ibmr,
> + u64 addr)
Can you please read through this giant driver and hit various places
with wonky formatting with clang-format? We don't need to start out a
new driver with bonkers indentation.
> +
> +static const struct ib_device_ops irdma_roce_dev_ops = {
> + .get_link_layer = irdma_get_link_layer,
> + .query_ah = irdma_query_ah,
> + .attach_mcast = irdma_attach_mcast,
> + .detach_mcast = irdma_detach_mcast,
> + .query_gid = irdma_query_gid_roce,
> + .modify_qp = irdma_modify_qp_roce,
> +};
> +
> +static const struct ib_device_ops irdma_iw_dev_ops = {
> + .query_gid = irdma_query_gid,
> + .modify_qp = irdma_modify_qp,
> +};
> +
> +static const struct ib_device_ops irdma_dev_ops = {
> + .get_port_immutable = irdma_port_immutable,
> + .get_netdev = irdma_get_netdev,
> + .query_port = irdma_query_port,
> + .modify_port = irdma_modify_port,
> + .query_pkey = irdma_query_pkey,
> + .alloc_ucontext = irdma_alloc_ucontext,
> + .dealloc_ucontext = irdma_dealloc_ucontext,
> + .mmap = irdma_mmap,
> + .alloc_pd = irdma_alloc_pd,
> + .dealloc_pd = irdma_dealloc_pd,
> + .create_qp = irdma_create_qp,
> + .query_qp = irdma_query_qp,
> + .destroy_qp = irdma_destroy_qp,
> + .create_cq = irdma_create_cq,
> + .destroy_cq = irdma_destroy_cq,
> + .get_dma_mr = irdma_get_dma_mr,
> + .reg_user_mr = irdma_reg_user_mr,
> + .dereg_mr = irdma_dereg_mr,
> + .alloc_mw = irdma_alloc_mw,
> + .dealloc_mw = irdma_dealloc_mw,
> + .alloc_hw_stats = irdma_alloc_hw_stats,
> + .get_hw_stats = irdma_get_hw_stats,
> + .query_device = irdma_query_device,
> + .create_ah = irdma_create_ah,
> + .destroy_ah = irdma_destroy_ah,
> + .drain_sq = irdma_drain_sq,
> + .drain_rq = irdma_drain_rq,
> + .alloc_mr = irdma_alloc_mr,
> + .map_mr_sg = irdma_map_mr_sg,
> + .get_dev_fw_str = irdma_get_dev_fw_str,
> + .poll_cq = irdma_poll_cq,
> + .req_notify_cq = irdma_req_notify_cq,
> + .post_send = irdma_post_send,
> + .post_recv = irdma_post_recv,
> + .disassociate_ucontext = irdma_disassociate_ucontext,
> + INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
> +};
All lists of things should be sorted. I saw many examples of unsorted
lists.
> +/**
> + * irdma_init_roce_device - initialization of iwarp rdma device
> + * @iwibdev: irdma ib device
> + */
> +static int irdma_init_iw_device(struct irdma_ib_device *iwibdev)
> +{
> + struct net_device *netdev = iwibdev->iwdev->netdev;
> +
> + iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
> + ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
> + iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
> + if (!iwibdev->ibdev.iwcm)
> + return -ENOMEM;
> +
> + iwibdev->ibdev.iwcm->add_ref = irdma_add_ref;
> + iwibdev->ibdev.iwcm->rem_ref = irdma_rem_ref;
> + iwibdev->ibdev.iwcm->get_qp = irdma_get_qp;
> + iwibdev->ibdev.iwcm->connect = irdma_connect;
> + iwibdev->ibdev.iwcm->accept = irdma_accept;
> + iwibdev->ibdev.iwcm->reject = irdma_reject;
> + iwibdev->ibdev.iwcm->create_listen = irdma_create_listen;
> + iwibdev->ibdev.iwcm->destroy_listen = irdma_destroy_listen;
Huh. These should probably be moved into the ops structure too.
> +/**
> + * irdma_register_rdma_device - register iwarp device to IB
> + * @iwdev: iwarp device
> + */
> +int irdma_register_rdma_device(struct irdma_device *iwdev)
> +{
> + int ret;
> + struct irdma_ib_device *iwibdev;
> +
> + ret = irdma_init_rdma_device(iwdev);
> + if (ret)
> + return ret;
> +
> + iwibdev = iwdev->iwibdev;
> + rdma_set_device_sysfs_group(&iwibdev->ibdev, &irdma_attr_group);
> + if (iwdev->rf->sc_dev.hw_attrs.hw_rev == IRDMA_GEN_1)
> + /* backward compat with old user-space libi40iw */
> + iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
Really? Then what is the problem in rdma-core?
Why are we getting a replacement driver instead of fixing the old one?
This is very long, I didn't read it super closely :(
Jason
^ permalink raw reply
* Re: [PATCH net] net: validate untrusted gso packets without csum offload
From: Eric Dumazet @ 2019-02-15 17:36 UTC (permalink / raw)
To: Willem de Bruijn, netdev
Cc: davem, edumazet, jasowang, maximmi, Willem de Bruijn, syzbot
In-Reply-To: <20190215171547.247018-1-willemdebruijn.kernel@gmail.com>
On 02/15/2019 09:15 AM, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Syzkaller again found a path to a kernel crash through bad gso input.
> By building an excessively large packet to cause an skb field to wrap.
>
> If VIRTIO_NET_HDR_F_NEEDS_CSUM was set this would have been dropped in
> skb_partial_csum_set.
>
> GSO packets that do not set checksum offload are suspicious and rare.
> Most callers of virtio_net_hdr_to_skb already pass them to
> skb_probe_transport_header.
>
> Move that test forward, change it to detect parse failure and drop
> packets on failure as those cleary are not one of the legitimate
> VIRTIO_NET_HDR_GSO types.
>
> Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.")
> Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr")
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* Re: [PATCH RESEND net] net: phy: xgmiitorgmii: Support generic PHY status read
From: Florian Fainelli @ 2019-02-15 17:38 UTC (permalink / raw)
To: Paul Kocialkowski, netdev, linux-arm-kernel, linux-kernel
Cc: Andrew Lunn, Heiner Kallweit, David S . Miller, Michal Simek,
Thomas Petazzoni
In-Reply-To: <20190215163220.20041-1-paul.kocialkowski@bootlin.com>
On 2/15/19 8:32 AM, Paul Kocialkowski wrote:
> Some PHY drivers like the generic one do not provide a read_status
> callback on their own but rely on genphy_read_status being called
> directly.
>
> With the current code, this results in a NULL function pointer call.
> Call genphy_read_status instead when there is no specific callback.
>
> Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")
> Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
> ---
> Added Fixes tag and net label for resend.
You would want to use phy_read_status() which encapsulates that check as
well as checks that the phy_drv pointer is not NULL.
>
> drivers/net/phy/xilinx_gmii2rgmii.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
> index 74a8782313cf..bd6084e315de 100644
> --- a/drivers/net/phy/xilinx_gmii2rgmii.c
> +++ b/drivers/net/phy/xilinx_gmii2rgmii.c
> @@ -44,7 +44,10 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
> u16 val = 0;
> int err;
>
> - err = priv->phy_drv->read_status(phydev);
> + if (priv->phy_drv->read_status)
> + err = priv->phy_drv->read_status(phydev);
> + else
> + err = genphy_read_status(phydev);
> if (err < 0)
> return err;
>
>
--
Florian
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox