* [PATCH] {NET,IB}/mlx4: 64 byte CQE/EQE support
@ 2012-05-24 14:30 Or Gerlitz
[not found] ` <1337869844-30259-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Or Gerlitz @ 2012-05-24 14:30 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Or Gerlitz, Eli Cohen,
Jack Morgenstein, Yevgeny Petrilin, Liran Liss, Tzahi Oved
CX3 devices can work with 64 or 32 byte CQEs/EQEs. Using 64 byte
EQEs/CQEs allow better utilization of new chipsets and gaining higher
performance. This patch queries the HCA's capabilities and if it
supports BOTH 64 byte CQEs and EQES will configure the HW to work
in 64 byte mode. Note that the 32B vs 64B working mode is global,
per HCA and not per CQ or EQ.
Since this mode is global, userspace (libmlx4) must be updated to
work with the configured CQE size, and similarily under SRIOV, guests
that use ConnectX virtual functions need to know both EQE and CQE size.
The patch makes sure that older guest drivers who follows the
QUERY_DEV_FUNC command (e.g as done in mlx4_core of Linux 3.3/3.4)
will notice that they need an update to be able to work with the
PPF since the returned pf_context_behaviour will not be zero any more.
User space notification is done through a new field introduced
in struct mlx4_ib_ucontext which holds device capabilities for
which user space must take action. This changes the binary interface so
the ABI is bumped from 3 to 4 but only when **needed** e.g only when the
driver does use 64B CQEs or future device capabilities which must be
in sync by user space. This would allow to work with unmodified libmlx4
on older devices (e.g A0, B0) which don't support 64 byte cookies.
Signed-off-by: Eli Cohen <eli-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
pointers to V0
mlx4 http://marc.info/?l=linux-rdma&m=131805712306677&w=2
libmlx4 http://marc.info/?l=linux-rdma&m=131805712306678&w=2
changes from V0
- unified the 64B CQE and EQE patches to one patch which takes an approach of
apply both or none, under the thinking that 99% FW will support both or none.
- bump the ABI version towards user-space/libmlx4 only when needed and not always
- added support for SRIOV, using the PF_CONTEXT_BEHAVIOUR_MASK mechanism of
the query func capabilities command, modified "sizeof (struct mlx4_eqe)"
to be 32 or 64 in mlx4_multi_func_init() and slave_event().
drivers/infiniband/hw/mlx4/cq.c | 33 ++++++++++++++++++-----
drivers/infiniband/hw/mlx4/main.c | 26 +++++++++++++++---
drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 +
drivers/infiniband/hw/mlx4/user.h | 15 ++++++++++-
drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_cq.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 +
drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 ++-
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 5 ++-
drivers/net/ethernet/mellanox/mlx4/eq.c | 25 +++++++++++------
drivers/net/ethernet/mellanox/mlx4/fw.c | 11 +++++++-
drivers/net/ethernet/mellanox/mlx4/main.c | 19 +++++++++++++-
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 +
include/linux/mlx4/device.h | 18 ++++++++++++-
14 files changed, 133 insertions(+), 31 deletions(-)
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6d4ef71..786663c 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
- return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+ return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+ struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
- return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
{
int err;
- err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+ err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
+ buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt);
if (err)
@@ -120,7 +122,7 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf:
- mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
+ mlx4_buf_free(dev->dev, nent * buf->entry_size,
&buf->buf);
out:
@@ -129,7 +131,7 @@ out:
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{
- mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+ mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -137,8 +139,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
u64 buf_addr, int cqe)
{
int err;
+ int cqe_size = dev->dev->caps.cqe_size;
- *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+ *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -331,16 +334,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
+ int cqe_size = cq->buf.entry_size;
+ int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
+ cqe += cqe_inc;
+
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
(i + 1) & cq->resize_buf->cqe);
- memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+ memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+ new_cqe += cqe_inc;
+
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+ cqe += cqe_inc;
}
++cq->mcq.cons_index;
}
@@ -438,6 +448,7 @@ err_buf:
out:
mutex_unlock(&cq->resize_mutex);
+
return err;
}
@@ -565,6 +576,9 @@ repoll:
if (!cqe)
return -EAGAIN;
+ if (cq->buf.entry_size == 64)
+ cqe++;
+
++cq->mcq.cons_index;
/*
@@ -778,6 +792,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
int nfreed = 0;
struct mlx4_cqe *cqe, *dest;
u8 owner_bit;
+ int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
/*
* First we need to find the current producer index, so we
@@ -796,12 +811,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ cqe += cqe_inc;
+
if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+ dest += cqe_inc;
+
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ee1c577..e93d822 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -510,15 +510,23 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- resp.qp_tab_size = dev->dev->caps.num_qps;
- resp.bf_reg_size = dev->dev->caps.bf_reg_size;
- resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ resp_v3.qp_tab_size = dev->dev->caps.num_qps;
+ resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ } else {
+ resp.dev_caps = dev->dev->caps.userspace_caps;
+ resp.qp_tab_size = dev->dev->caps.num_qps;
+ resp.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ }
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
@@ -533,7 +541,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- err = ib_copy_to_udata(udata, &resp, sizeof resp);
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ err = ib_copy_to_udata(udata, &resp_v3, sizeof resp_v3);
+ else
+ err = ib_copy_to_udata(udata, &resp, sizeof resp);
+
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
@@ -1209,7 +1221,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ if (dev->caps.userspace_caps)
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ else
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e62297c..1321325 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -66,6 +66,7 @@ struct mlx4_ib_xrcd {
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
+ int entry_size;
};
struct mlx4_ib_cq_resize {
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 13beede..07afc93 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -40,7 +40,13 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define MLX4_IB_UVERBS_ABI_VERSION 3
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
+#define MLX4_IB_UVERBS_ABI_VERSION 4
+
+enum {
+ MLX4_64_BYTE_CQE = 1 << 0
+};
/*
* Make sure that all structs defined in this file remain laid out so
@@ -50,7 +56,14 @@
* instead.
*/
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+ __u32 qp_tab_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
+};
+
struct mlx4_ib_alloc_ucontext_resp {
+ __u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 1bcead1..50caff3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1517,7 +1517,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
spin_lock_init(&s_state->lock);
}
- memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+ memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
INIT_WORK(&priv->mfunc.master.comm_work,
mlx4_master_comm_channel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 908a460..a096c64 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
int err;
cq->size = entries;
- cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
+ cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
cq->ring = ring;
cq->is_tx = mode;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 926d8aa..235f147 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1120,6 +1120,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
goto out;
}
priv->rx_ring_num = prof->rx_ring_num;
+ priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
priv->mac_index = -1;
priv->msg_enable = MLX4_EN_MSG_LEVEL;
spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index d49a7ac..b5387f6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -541,6 +541,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
int ip_summed;
struct ethhdr *ethh;
u64 s_mac;
+ int factor = priv->cqe_factor;
if (!priv->port_up)
return 0;
@@ -549,7 +550,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* descriptor offset can be deduced from the CQE index instead of
* reading 'cqe->index' */
index = cq->mcq.cons_index & ring->size_mask;
- cqe = &cq->buf[index];
+ cqe = &cq->buf[(index << factor) + factor];
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -680,7 +681,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
next:
++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask;
- cqe = &cq->buf[index];
+ cqe = &cq->buf[(index << factor) + factor];
if (++polled == budget) {
/* We are here because we reached the NAPI budget -
* flush only pending LRO sessions */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 019d856..5a6d468 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -317,12 +317,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
struct mlx4_cqe *buf = cq->buf;
u32 packets = 0;
u32 bytes = 0;
+ int factor = priv->cqe_factor;
if (!priv->port_up)
return;
index = cons_index & size_mask;
- cqe = &buf[index];
+ cqe = &buf[(index << factor) + factor];
ring_index = ring->cons & size_mask;
/* Process all completed CQEs */
@@ -351,7 +352,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
++cons_index;
index = cons_index & size_mask;
- cqe = &buf[index];
+ cqe = &buf[(index << factor) + factor];
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 3b6f8ef..1f641c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -91,15 +91,20 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
mb();
}
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
{
- unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
- return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+ /* (entry & (eq->nent - 1)) gives us a cyclic array */
+ unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
+ /* CX3 is capable of extending the EQE from 32 to 64 bytes.
+ When this feature is enabled, the first (in the lower addresses)
+ 32 bytes in the 64 byte EQE are reserved and the next 32
+ bytes contain the legacy EQE information. */
+ return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
}
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
{
- struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+ struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}
@@ -164,7 +169,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
return;
}
- memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
+ memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
s_eqe->slave_id = slave;
/* ensure all information is written before setting the ownersip bit */
wmb();
@@ -242,7 +247,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
u8 update_slave_state;
int i;
- while ((eqe = next_eqe_sw(eq))) {
+ while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -634,7 +639,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
- npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@@ -736,8 +742,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int err;
- int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
int i;
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 68f5cd6..5845862 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -109,6 +109,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
[41] = "Unicast VEP steering support",
[42] = "Multicast VEP steering support",
[48] = "Counters support",
+ [61] = "64 byte EQE support",
+ [62] = "64 byte CQE support",
};
int i;
@@ -198,7 +200,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
field = dev->caps.num_ports;
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
- size = 0; /* no PF behavious is set for now */
+ size = dev->caps.function_caps;
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
size = dev->caps.num_qps;
@@ -1059,6 +1061,13 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+ }
+
/* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2e024a6..9628d48 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -95,7 +95,8 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
#define MLX4_VF (1 << 0)
#define HCA_GLOBAL_CAP_MASK 0
-#define PF_CONTEXT_BEHAVIOUR_MASK 0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
static char mlx4_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX core driver v"
@@ -374,6 +375,22 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ if (dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE &&
+ dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+ dev->caps.cqe_size = 64;
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.cqe_size = 32;
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE)
+ mlx4_err(dev, "64B CQEs supported but not 64B EQEs, ignoring\n");
+ else if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE)
+ mlx4_err(dev, "64B EQEs supported but not 64B CQEs, ignoring\n");
+ }
+
return 0;
}
/*The function checks if there are live vf, return the num of them*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 6ae3509..11fd287 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -463,6 +463,7 @@ struct mlx4_en_priv {
int mac_index;
unsigned max_mtu;
int base_qpn;
+ int cqe_factor;
struct mlx4_en_rss_map rss_map;
__be32 ctrl_flags;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6e27fa9..e6b59bc 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -95,7 +95,18 @@ enum {
MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41,
MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42,
MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
- MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55
+ MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
+ MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
+ MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
+};
+
+
+enum {
+ MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
+};
+
+enum {
+ MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
};
enum {
@@ -319,6 +330,11 @@ struct mlx4_caps {
enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
u32 max_counters;
u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
+ u32 eqe_size;
+ u32 cqe_size;
+ u8 eqe_factor;
+ u32 userspace_caps; /* userspace must be aware to */
+ u32 function_caps; /* functions must be aware to */
};
struct mlx4_buf_list {
--
1.7.1
Cc: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Cc: Yevgeny Petrilin <yevgenyp-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: Liran Liss <liranl-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Cc: Tzahi Oved <tzahio-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-05-29 19:09 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-24 14:30 [PATCH] {NET,IB}/mlx4: 64 byte CQE/EQE support Or Gerlitz
[not found] ` <1337869844-30259-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2012-05-29 14:41 ` Or Gerlitz
[not found] ` <4FC4E016.4070607-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2012-05-29 17:45 ` Roland Dreier
[not found] ` <CAL1RGDVibQjUE2Jh0JTc-Z_OxO8ekwAOVpFmCKmysF8M-2b4NQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-05-29 17:57 ` Jason Gunthorpe
[not found] ` <20120529175712.GB17863-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2012-05-29 18:00 ` Roland Dreier
2012-05-29 19:09 ` Or Gerlitz
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.