* [PATCH rdma-next v1 12/13] IB/mlx5: Add flow counters read support
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
Implements the flow counters read wrapper.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/main.c | 16 ++++++++++++++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 11 +++++++++++
2 files changed, 27 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3f1e957946e6..2044d9f69a83 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3150,7 +3150,21 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev,
}
}
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = (struct mlx5_fc *)(read_attr->hw_cntrs_hndl);
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc->id,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+
static int counters_set_description(struct ib_counters *counters,
enum mlx5_ib_counters_type counters_type,
const void __user *cntrs_data,
@@ -3182,6 +3196,8 @@ static int counters_set_description(struct ib_counters *counters,
/* init the fields for the object */
mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
mcounters->ncounters = ncounters;
desc = mcounters->counters_data;
for (i = 0; i < ncounters * 2; i += 2) {
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 7313d3cd04f0..1baee579d84b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -814,6 +814,12 @@ struct mlx5_memic {
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
};
+struct mlx5_read_counters_attr {
+ void *hw_cntrs_hndl;
+ u64 *out;
+ u32 flags;
+};
+
enum mlx5_ib_counters_type {
MLX5_IB_COUNTERS_FLOW,
};
@@ -821,7 +827,12 @@ enum mlx5_ib_counters_type {
struct mlx5_ib_mcounters {
struct ib_counters ibcntrs;
enum mlx5_ib_counters_type type;
+ /* number of counters supported for this counters type */
+ u32 counters_num;
void *hw_cntrs_hndl;
+ /* read function for this counters type */
+ int (*read_counters)(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr);
/* max index set as part of create_flow */
u32 cntrs_max_index;
/* number of counters data entries (<description,index> pair) */
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 11/13] IB/mlx5: Add flow counters binding support
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
Associates a counters with a flow when IB_FLOW_SPEC_ACTION_COUNT
is part of the flow specifications.
The counters user space placements of location and description
(index, description) pairs are passed as private data of the
counters flow specification.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/main.c | 207 +++++++++++++++++++++++++++++++++--
drivers/infiniband/hw/mlx5/mlx5_ib.h | 15 +++
include/linux/mlx5/fs.h | 1 +
include/uapi/rdma/mlx5-abi.h | 14 +++
4 files changed, 225 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 18bfee86fa52..3f1e957946e6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2472,7 +2472,7 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
#define LAST_TUNNEL_FIELD tunnel_id
#define LAST_FLOW_TAG_FIELD tag_id
#define LAST_DROP_FIELD size
-#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -2836,6 +2836,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
if (ret)
return ret;
break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
default:
return -EINVAL;
}
@@ -2983,6 +2995,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev,
}
}
+static void counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
{
struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
@@ -3004,6 +3027,10 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
put_flow_table(dev, handler->prio, true);
mutex_unlock(&dev->flow_db->lock);
+ if (handler->ibcounters &&
+ atomic_read(&handler->ibcounters->usecnt) == 1)
+ counters_clear_description(handler->ibcounters);
+
kfree(handler);
return 0;
@@ -3123,22 +3150,119 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev,
}
}
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+static int counters_set_description(struct ib_counters *counters,
+ enum mlx5_ib_counters_type counters_type,
+ const void __user *cntrs_data,
+ u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 *desc;
+ int ret;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ if (ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ /* each counter entry have both description and index pair */
+ mcounters->counters_data = kcalloc(ncounters,
+ sizeof(u32) * 2,
+ GFP_KERNEL);
+ if (!mcounters->counters_data)
+ return -ENOMEM;
+
+ if (copy_from_user(mcounters->counters_data, cntrs_data,
+ sizeof(u32) * ncounters * 2)) {
+ ret = -EFAULT;
+ goto data_err;
+ }
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->ncounters = ncounters;
+ desc = mcounters->counters_data;
+ for (i = 0; i < ncounters * 2; i += 2) {
+ if (desc[i] > IB_COUNTER_BYTES) {
+ ret = -EINVAL;
+ goto data_err;
+ }
+
+ if (mcounters->cntrs_max_index <= desc[i+1])
+ mcounters->cntrs_max_index = desc[i+1] + 1;
+ }
+
+ return 0;
+
+data_err:
+ counters_clear_description(counters);
+
+ return ret;
+}
+
+static int flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ int err = 0;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ cntrs_data->ncounters);
+ if (err)
+ goto err;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl =
+ (void *)mlx5_fc_create(to_mdev(ibcounters->device)->mdev,
+ false);
+ if (!mcounters->hw_cntrs_hndl)
+ err = -ENOMEM;
+ }
+
+err:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return err;
+}
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst,
- u32 underlay_qpn)
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination *rule_dst = dst;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
u32 prev_type = 0;
int err = 0;
- int dest_num = 1;
+ int dest_num = 0;
bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
if (!is_valid_attr(dev->mdev, flow_attr))
@@ -3152,6 +3276,10 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
INIT_LIST_HEAD(&handler->list);
+ if (dst) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
@@ -3188,15 +3316,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
goto free;
}
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter =
+ (struct mlx5_fc *)(to_mcounters(flow_act.counters)->hw_cntrs_hndl);
+ dest_num++;
+ }
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- rule_dst = NULL;
- dest_num = 0;
+ if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+ rule_dst = NULL;
+ dest_num = 0;
+ }
} else {
if (is_egress)
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
else
flow_act.action |=
- dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
@@ -3233,7 +3376,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
}
static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
@@ -3373,12 +3516,43 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
int err;
int underlay_qpn;
- if (udata &&
- udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+ sizeof(ucmd_hdr.reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err) {
+ kfree(ucmd);
+ return ERR_PTR(err);
+ }
+ }
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
@@ -3433,7 +3607,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
mqp->underlay_qpn : 0;
handler = _create_flow_rule(dev, ft_prio, flow_attr,
- dst, underlay_qpn);
+ dst, underlay_qpn, ucmd);
}
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
@@ -3454,6 +3628,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
mutex_unlock(&dev->flow_db->lock);
kfree(dst);
+ kfree(ucmd);
return &handler->ibflow;
@@ -3464,6 +3639,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
unlock:
mutex_unlock(&dev->flow_db->lock);
kfree(dst);
+ kfree(ucmd);
kfree(handler);
return ERR_PTR(err);
}
@@ -5128,6 +5304,11 @@ static int mlx5_ib_destroy_counters(struct ib_counters *counters)
{
struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ (struct mlx5_fc *)mcounters->hw_cntrs_hndl);
+
kfree(mcounters);
return 0;
@@ -5142,6 +5323,8 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
if (!mcounters)
return ERR_PTR(-ENOMEM);
+ mutex_init(&mcounters->mcntrs_mutex);
+
return &mcounters->ibcntrs;
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index fd27ec1aed08..7313d3cd04f0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -175,6 +175,7 @@ struct mlx5_ib_flow_handler {
struct ib_flow ibflow;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_handle *rule;
+ struct ib_counters *ibcounters;
};
struct mlx5_ib_flow_db {
@@ -813,8 +814,22 @@ struct mlx5_memic {
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
};
+enum mlx5_ib_counters_type {
+ MLX5_IB_COUNTERS_FLOW,
+};
+
struct mlx5_ib_mcounters {
struct ib_counters ibcntrs;
+ enum mlx5_ib_counters_type type;
+ void *hw_cntrs_hndl;
+ /* max index set as part of create_flow */
+ u32 cntrs_max_index;
+ /* number of counters data entries (<description,index> pair) */
+ u32 ncounters;
+ /* counters data array for descriptions and indexes */
+ u32 *counters_data;
+ /* protects access to mcounters internal data */
+ struct mutex mcntrs_mutex;
};
static inline struct mlx5_ib_mcounters *
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 93aab0f055b4..4612e0ad688b 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -160,6 +160,7 @@ struct mlx5_flow_act {
u32 modify_id;
uintptr_t esp_id;
struct mlx5_fs_vlan vlan;
+ struct ib_counters *counters;
};
#define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 508ea8c82da7..ef3f430a7050 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -443,4 +443,18 @@ enum {
enum {
MLX5_IB_CLOCK_INFO_V1 = 0,
};
+
+struct mlx5_ib_flow_counters_data {
+ __aligned_u64 counters_data;
+ __u32 ncounters;
+ __u32 reserved;
+};
+
+struct mlx5_ib_create_flow {
+ __u32 ncounters_data;
+ __u32 reserved;
+ /* Following are counters data based on ncounters_data */
+ struct mlx5_ib_flow_counters_data data[];
+};
+
#endif /* MLX5_ABI_USER_H */
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 10/13] IB/mlx5: Add counters create and destroy support
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
This patch implements the device counters create and destroy APIs
and introducing some internal management structures.
Downstream patches in this series will add the functionality to
support flow counters binding and reading.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/main.c | 23 +++++++++++++++++++++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 ++++++++++
2 files changed, 33 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 59f86198eb3b..18bfee86fa52 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5124,6 +5124,27 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev)
uverbs_free_spec_tree(dev->ib_dev.specs_root);
}
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ kfree(mcounters);
+
+ return 0;
+}
+
+static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
+ if (!mcounters)
+ return ERR_PTR(-ENOMEM);
+
+ return &mcounters->ibcntrs;
+}
+
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -5367,6 +5388,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
+ dev->ib_dev.create_counters = mlx5_ib_create_counters;
+ dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
err = init_node_data(dev);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 49a1aa0ff429..fd27ec1aed08 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -813,6 +813,16 @@ struct mlx5_memic {
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
};
+struct mlx5_ib_mcounters {
+ struct ib_counters ibcntrs;
+};
+
+static inline struct mlx5_ib_mcounters *
+to_mcounters(struct ib_counters *ibcntrs)
+{
+ return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
+}
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 09/13] IB/uverbs: Add support for flow counters
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
The struct ib_uverbs_flow_spec_action_count associates
a counters object with the flow.
Post this association the flow counters can be read via
the counters object.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/uverbs.h | 1 +
drivers/infiniband/core/uverbs_cmd.c | 81 +++++++++++++++++++++++++++++++-----
include/uapi/rdma/ib_user_verbs.h | 13 ++++++
3 files changed, 84 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5b2461fa634d..c0d40fc3a53a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_action_tag flow_tag;
struct ib_uverbs_flow_spec_action_drop drop;
struct ib_uverbs_flow_spec_action_handle action;
+ struct ib_uverbs_flow_spec_action_count flow_count;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ddb9d79691be..3179a95c6f5e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2748,43 +2748,82 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_uflow_resources {
size_t max;
size_t num;
- struct ib_flow_action *collection[0];
+ size_t collection_num;
+ size_t counters_num;
+ struct ib_counters **counters;
+ struct ib_flow_action **collection;
};
static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
{
struct ib_uflow_resources *resources;
- resources =
- kmalloc(sizeof(*resources) +
- num_specs * sizeof(*resources->collection), GFP_KERNEL);
+ resources = kzalloc(sizeof(*resources), GFP_KERNEL);
if (!resources)
- return NULL;
+ goto err_res;
+
+ resources->counters =
+ kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
+
+ if (!resources->counters)
+ goto err_cnt;
+
+ resources->collection =
+ kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources->collection)
+ goto err_collection;
- resources->num = 0;
resources->max = num_specs;
return resources;
+
+err_collection:
+ kfree(resources->counters);
+err_cnt:
+ kfree(resources);
+err_res:
+ return NULL;
}
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{
unsigned int i;
- for (i = 0; i < uflow_res->num; i++)
+ for (i = 0; i < uflow_res->collection_num; i++)
atomic_dec(&uflow_res->collection[i]->usecnt);
+ for (i = 0; i < uflow_res->counters_num; i++)
+ atomic_dec(&uflow_res->counters[i]->usecnt);
+
+ kfree(uflow_res->collection);
+ kfree(uflow_res->counters);
kfree(uflow_res);
}
static void flow_resources_add(struct ib_uflow_resources *uflow_res,
- struct ib_flow_action *action)
+ enum ib_flow_spec_type type,
+ void *ibobj)
{
WARN_ON(uflow_res->num >= uflow_res->max);
- atomic_inc(&action->usecnt);
- uflow_res->collection[uflow_res->num++] = action;
+ switch (type) {
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
+ uflow_res->collection[uflow_res->collection_num++] =
+ (struct ib_flow_action *)ibobj;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
+ uflow_res->counters[uflow_res->counters_num++] =
+ (struct ib_counters *)ibobj;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ uflow_res->num++;
}
static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
@@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
return -EINVAL;
ib_spec->action.size =
sizeof(struct ib_flow_spec_action_handle);
- flow_resources_add(uflow_res, ib_spec->action.act);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_HANDLE,
+ ib_spec->action.act);
uobj_put_obj_read(ib_spec->action.act);
break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (kern_spec->flow_count.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_count))
+ return -EINVAL;
+ ib_spec->flow_count.counters =
+ uobj_get_obj_read(counters,
+ UVERBS_OBJECT_COUNTERS,
+ kern_spec->flow_count.handle,
+ ucontext);
+ if (!ib_spec->flow_count.counters)
+ return -EINVAL;
+ ib_spec->flow_count.size =
+ sizeof(struct ib_flow_spec_action_count);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_COUNT,
+ ib_spec->flow_count.counters);
+ uobj_put_obj_read(ib_spec->flow_count.counters);
+ break;
default:
return -EINVAL;
}
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 409507f83b91..4f9991de8e3a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -998,6 +998,19 @@ struct ib_uverbs_flow_spec_action_handle {
__u32 reserved1;
};
+struct ib_uverbs_flow_spec_action_count {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ __u32 handle;
+ __u32 reserved1;
+};
+
struct ib_uverbs_flow_tunnel_filter {
__be32 tunnel_id;
};
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 07/13] IB/core: Support passing uhw for create_flow
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Matan Barak <matanb@mellanox.com>
This is required when user-space drivers need to pass extra information
regarding how to handle this flow steering specification.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/uverbs_cmd.c | 7 ++++++-
drivers/infiniband/core/verbs.c | 2 +-
drivers/infiniband/hw/mlx4/main.c | 6 +++++-
drivers/infiniband/hw/mlx5/main.c | 7 ++++++-
include/rdma/ib_verbs.h | 3 ++-
5 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e74262ee104c..ddb9d79691be 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3542,11 +3542,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
- flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+
+ flow_id = qp->device->create_flow(qp, flow_attr,
+ IB_FLOW_DOMAIN_USER, uhw);
+
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
goto err_free;
}
+ atomic_inc(&qp->usecnt);
+ flow_id->qp = qp;
flow_id->uobject = uobj;
uobj->object = flow_id;
uflow = container_of(uobj, typeof(*uflow), uobject);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6ddfb1fade79..0b56828c1319 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1983,7 +1983,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
if (!qp->device->create_flow)
return ERR_PTR(-EOPNOTSUPP);
- flow_id = qp->device->create_flow(qp, flow_attr, domain);
+ flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
if (!IS_ERR(flow_id)) {
atomic_inc(&qp->usecnt);
flow_id->qp = qp;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bf12394c13c1..6fe5d5d1d1d9 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1848,7 +1848,7 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain)
+ int domain, struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
@@ -1866,6 +1866,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
+ if (udata &&
+ udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 25a271ef8374..59f86198eb3b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3363,7 +3363,8 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain)
+ int domain,
+ struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
@@ -3375,6 +3376,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int err;
int underlay_qpn;
+ if (udata &&
+ udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f6bd3b97b971..80956b1c9f4d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2459,7 +2459,8 @@ struct ib_device {
struct ib_flow * (*create_flow)(struct ib_qp *qp,
struct ib_flow_attr
*flow_attr,
- int domain);
+ int domain,
+ struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 06/13] IB/uverbs: Add read counters support
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
This patch exposes the read counters verb to user space
applications.
By that verb the user can read the hardware counters which
are associated with the counters object.
The application needs to provide a sufficient memory to
hold the statistics.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
.../infiniband/core/uverbs_std_types_counters.c | 59 +++++++++++++++++++++-
include/uapi/rdma/ib_user_ioctl_cmds.h | 7 +++
2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index a5bc50ceee13..b35fcd3718c8 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -80,6 +80,49 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
return ret;
}
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters_read_attr read_attr = {};
+ const struct uverbs_attr *uattr;
+ struct ib_counters *counters =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
+ int ret;
+
+ if (!ib_dev->read_counters)
+ return -EOPNOTSUPP;
+
+ if (!atomic_read(&counters->usecnt))
+ return -EINVAL;
+
+ ret = uverbs_copy_from(&read_attr.flags, attrs,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS);
+ if (ret)
+ return ret;
+
+ uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
+ read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
+ read_attr.counters_buff = kcalloc(read_attr.ncounters,
+ sizeof(u64), GFP_KERNEL);
+ if (!read_attr.counters_buff)
+ return -ENOMEM;
+
+ ret = ib_dev->read_counters(counters,
+ &read_attr,
+ attrs);
+ if (ret)
+ goto err_read;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+ read_attr.counters_buff,
+ read_attr.ncounters * sizeof(u64));
+
+err_read:
+ kfree(read_attr.counters_buff);
+ return ret;
+}
+
static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
UVERBS_OBJECT_COUNTERS,
@@ -93,8 +136,22 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
UVERBS_ACCESS_DESTROY,
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
&UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY));
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index c28ce62d2e40..888ac5975a6c 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -140,9 +140,16 @@ enum uverbs_attrs_destroy_counters_cmd_attr_ids {
UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
};
+enum uverbs_attrs_read_counters_cmd_attr_ids {
+ UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS,
+};
+
enum uverbs_methods_actions_counters_ops {
UVERBS_METHOD_COUNTERS_CREATE,
UVERBS_METHOD_COUNTERS_DESTROY,
+ UVERBS_METHOD_COUNTERS_READ,
};
#endif
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 05/13] IB/core: Introduce counters read verb
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
The user supplies counters instance and a reference to an output
array of uint64_t.
The driver reads the hardware counters values and writes them to
the output index location in the user supplied array.
All counters values are represented as uint64_t types.
To be able to successfully read the data the counters must be
first bound to an IB object.
Downstream patches will present binding method for
flow counters.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
include/rdma/ib_verbs.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ce3d39725966..f6bd3b97b971 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2219,6 +2219,17 @@ struct ib_counters {
atomic_t usecnt;
};
+enum ib_read_counters_flags {
+ /* prefer read values from driver cache */
+ IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
+};
+
+struct ib_counters_read_attr {
+ u64 *counters_buff;
+ u32 ncounters;
+ u32 flags; /* use enum ib_read_counters_flags */
+};
+
struct uverbs_attr_bundle;
struct ib_device {
@@ -2493,6 +2504,9 @@ struct ib_device {
struct ib_counters * (*create_counters)(struct ib_device *device,
struct uverbs_attr_bundle *attrs);
int (*destroy_counters)(struct ib_counters *counters);
+ int (*read_counters)(struct ib_counters *counters,
+ struct ib_counters_read_attr *counters_read_attr,
+ struct uverbs_attr_bundle *attrs);
/**
* rdma netdev operation
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 04/13] IB/uverbs: Add create/destroy counters support
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
User space application which uses counters functionality,
is expected to allocate/release the counters resources by
calling create/destroy verbs and in turn get a unique handle
that can be used to attach the counters to its counted type.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/Makefile | 2 +-
drivers/infiniband/core/uverbs.h | 1 +
drivers/infiniband/core/uverbs_std_types.c | 3 +-
.../infiniband/core/uverbs_std_types_counters.c | 100 +++++++++++++++++++++
include/uapi/rdma/ib_user_ioctl_cmds.h | 14 +++
5 files changed, 118 insertions(+), 2 deletions(-)
create mode 100644 drivers/infiniband/core/uverbs_std_types_counters.c
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 8d42373a2d8a..61667705d746 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -37,4 +37,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_ioctl_merge.o uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
- uverbs_std_types_mr.o
+ uverbs_std_types_mr.o uverbs_std_types_counters.o
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cfb51618ab7a..5b2461fa634d 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -287,6 +287,7 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 569f48bd821e..b570acbd94af 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -302,7 +302,8 @@ static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
&UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
&UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
&UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
new file mode 100644
index 000000000000..a5bc50ceee13
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_counters(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_counters *counters = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY &&
+ atomic_read(&counters->usecnt))
+ return -EBUSY;
+
+ return counters->device->destroy_counters(counters);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters *counters;
+ struct ib_uobject *uobj;
+ int ret;
+
+ /*
+ * This check should be removed once the infrastructure
+ * have the ability to remove methods from parse tree once
+ * such condition is met.
+ */
+ if (!ib_dev->create_counters)
+ return -EOPNOTSUPP;
+
+ uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+ counters = ib_dev->create_counters(ib_dev, attrs);
+ if (IS_ERR(counters)) {
+ ret = PTR_ERR(counters);
+ goto err_create_counters;
+ }
+
+ counters->device = ib_dev;
+ counters->uobject = uobj;
+ uobj->object = counters;
+ atomic_set(&counters->usecnt, 0);
+
+ return 0;
+
+err_create_counters:
+ return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY));
+
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 83e3890eef20..c28ce62d2e40 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -55,6 +55,7 @@ enum uverbs_default_objects {
UVERBS_OBJECT_WQ,
UVERBS_OBJECT_FLOW_ACTION,
UVERBS_OBJECT_DM,
+ UVERBS_OBJECT_COUNTERS,
};
enum {
@@ -131,4 +132,17 @@ enum uverbs_methods_mr {
UVERBS_METHOD_DM_MR_REG,
};
+enum uverbs_attrs_create_counters_cmd_attr_ids {
+ UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+};
+
+enum uverbs_attrs_destroy_counters_cmd_attr_ids {
+ UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+};
+
+enum uverbs_methods_actions_counters_ops {
+ UVERBS_METHOD_COUNTERS_CREATE,
+ UVERBS_METHOD_COUNTERS_DESTROY,
+};
+
#endif
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 03/13] IB/core: Introduce counters object and its create/destroy
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
A verbs application may need to get statistics and info on various
aspects of a verb object (e.g. Flow, QP, ...), in general case the
application will state which object's counters its interested in
(we refer to this action as attach), bind this new counters object
to the appropriate verb object and on later stage read their values
using the counters object.
This series introduces a general API for counters object that may
accumulate any ib object counters type, bound and read on demand.
Counters instance is allocated on an IB context and belongs to
that context.
Upon successful creation the counters can be bound to a verbs
object so that hardware counter instances can be created and read.
Downstream patches in this series will introduce the attach, bind
and the read functionality.
Counters instance can be de-allocated, upon successful
destruction the related hardware resources are released.
Prior to destroy call the user must first make sure that the counters
is not being used by any IB object, e.g. not attached to any of its
counted type otherwise an EBUSY error is invoked.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
include/rdma/ib_verbs.h | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e849bd0fc618..ce3d39725966 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2212,6 +2212,13 @@ struct ib_port_pkey_list {
struct list_head pkey_list;
};
+struct ib_counters {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ /* num of objects attached */
+ atomic_t usecnt;
+};
+
struct uverbs_attr_bundle;
struct ib_device {
@@ -2483,6 +2490,10 @@ struct ib_device {
struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
+ struct ib_counters * (*create_counters)(struct ib_device *device,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_counters)(struct ib_counters *counters);
+
/**
* rdma netdev operation
*
--
2.14.3
^ permalink raw reply related
* [PATCH mlx5-next v1 02/13] net/mlx5: Export flow counter related API
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Raed Salem <raeds@mellanox.com>
Exports counters API to be used in both IB and EN.
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 23 ----------------------
.../net/ethernet/mellanox/mlx5/core/fs_counters.c | 3 +++
include/linux/mlx5/fs.h | 22 +++++++++++++++++++++
3 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index b6da322a8016..40992aed1791 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -131,29 +131,6 @@ struct mlx5_flow_table {
struct rhltable fgs_hash;
};
-struct mlx5_fc_cache {
- u64 packets;
- u64 bytes;
- u64 lastuse;
-};
-
-struct mlx5_fc {
- struct rb_node node;
- struct list_head list;
-
- /* last{packets,bytes} members are used when calculating the delta since
- * last reading
- */
- u64 lastpackets;
- u64 lastbytes;
-
- u32 id;
- bool deleted;
- bool aging;
-
- struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
-};
-
struct mlx5_ft_underlay_qp {
struct list_head list;
u32 qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index b7ab929d5f8e..10f407843e03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -243,6 +243,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
return ERR_PTR(err);
}
+EXPORT_SYMBOL(mlx5_fc_create);
void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
{
@@ -260,6 +261,7 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
mlx5_cmd_fc_free(dev, counter->id);
kfree(counter);
}
+EXPORT_SYMBOL(mlx5_fc_destroy);
int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
@@ -317,6 +319,7 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, u16 id,
{
return mlx5_cmd_fc_query(dev, id, packets, bytes);
}
+EXPORT_SYMBOL(mlx5_fc_query);
void mlx5_fc_query_cached(struct mlx5_fc *counter,
u64 *bytes, u64 *packets, u64 *lastuse)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 9f4d32e41c06..93aab0f055b4 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -186,6 +186,28 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
void mlx5_fc_query_cached(struct mlx5_fc *counter,
u64 *bytes, u64 *packets, u64 *lastuse);
+int mlx5_fc_query(struct mlx5_core_dev *dev, u16 id,
+ u64 *packets, u64 *bytes);
+
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct rb_node node;
+ struct list_head list;
+
+ u64 lastpackets;
+ u64 lastbytes;
+
+ u32 id;
+ bool deleted;
+ bool aging;
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 01/13] IB/uverbs: Add an ib_uobject getter to ioctl() infrastructure
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180527102346.15149-1-leon@kernel.org>
From: Matan Barak <matanb@mellanox.com>
Previously, the user had to dig inside the attribute to get the uobject.
Add a helper function that correctly extract it (and do the required
checks) for him/her.
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/uverbs_std_types_cq.c | 23 +++++++++++-----------
.../infiniband/core/uverbs_std_types_flow_action.c | 4 ++--
include/rdma/uverbs_ioctl.h | 11 +++++++++++
3 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index b0dbae9dd0d7..3d293d01afea 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -65,7 +65,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
struct ib_cq_init_attr attr = {};
struct ib_cq *cq;
struct ib_uverbs_completion_event_file *ev_file = NULL;
- const struct uverbs_attr *ev_file_attr;
struct ib_uobject *ev_file_uobj;
if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
@@ -87,10 +86,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
UVERBS_ATTR_CREATE_CQ_FLAGS)))
return -EFAULT;
- ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
- if (!IS_ERR(ev_file_attr)) {
- ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
+ ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_uobj)) {
ev_file = container_of(ev_file_uobj,
struct ib_uverbs_completion_event_file,
uobj_file.uobj);
@@ -102,8 +99,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
goto err_event_file;
}
- obj = container_of(uverbs_attr_get(attrs,
- UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+ obj = container_of(uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uobject);
obj->uverbs_file = ucontext->ufile;
obj->comp_events_reported = 0;
@@ -170,13 +167,17 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj =
- uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
- struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
- uobject);
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_ucq_object *obj;
int ret;
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ obj = container_of(uobj, struct ib_ucq_object, uobject);
+
if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index b4f016dfa23d..a7be51cf2e42 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -320,7 +320,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
return ret;
/* No need to check as this attribute is marked as MANDATORY */
- uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -350,7 +350,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
if (ret)
return ret;
- uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
action = uobj->object;
if (action->type != IB_FLOW_ACTION_ESP)
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 4a4201d997a7..7ac6271a5ee0 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -420,6 +420,17 @@ static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_b
return uobj->object;
}
+static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return ERR_CAST(attr);
+
+ return attr->obj_attr.uobject;
+}
+
static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, const void *from, size_t size)
{
--
2.14.3
^ permalink raw reply related
* [PATCH rdma-next v1 00/13] Verbs flow counters support
From: Leon Romanovsky @ 2018-05-27 10:23 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
From: Leon Romanovsky <leonro@mellanox.com>
Changelog v0->v1:
* Decouple from DevX submission
* Use uverbs_attr_get_obj at counters read method
* Added define for max read buffer size (MAX_COUNTERS_BUFF_SIZE)
* Removed the struct mlx5_ib_flow_counter basic_flow_cnts and
the related structs used, used define instead
* Took Matan's patch from DevX
* uverbs_free_counters removed void* casting
* Added check to bound ncounters value (added define
* Changed user supplied data buffer structure to be array of
struct <desc,index> pair (applied this change to user space also)
Not changed:
* UAPI files
* Addition of uhw to flow
Thanks
----------------------------------------------------------------------
>From Raed:
This series comes to allow user space applications to monitor real time
traffic activity and events of the verbs objects it manages, e.g.:
ibv_qp, ibv_wq, ibv_flow.
This API enables generic counters creation and define mapping
to association with a verbs object, current mlx5 driver using
this API for flow counters.
With this API, an application can monitor the entire life cycle of
object activity, defined here as a static counters attachment.
This API also allows dynamic counters monitoring of measurement points
for a partial period in the verbs object life cycle.
In addition it presents the implementation of the generic counters interface.
This will be achieved by extending flow creation by adding a new flow count
specification type which allows the user to associate a previously created
flow counters using the generic verbs counters interface to the created flow,
once associated the user could read statistics by using the read function of
the generic counters interface.
The API includes:
1. create and destroyed API of a new counters objects
2. read the counters values from HW
Note:
Attaching API to allow application to define the measurement points per objects
is a user space only API and this data is passed to kernel when the counted
object (e.g. flow) is created with the counters object.
Thanks
Matan Barak (2):
IB/uverbs: Add an ib_uobject getter to ioctl() infrastructure
IB/core: Support passing uhw for create_flow
Raed Salem (11):
net/mlx5: Export flow counter related API
IB/core: Introduce counters object and its create/destroy
IB/uverbs: Add create/destroy counters support
IB/core: Introduce counters read verb
IB/uverbs: Add read counters support
IB/core: Add support for flow counters
IB/uverbs: Add support for flow counters
IB/mlx5: Add counters create and destroy support
IB/mlx5: Add flow counters binding support
IB/mlx5: Add flow counters read support
IB/mlx5: Add counters read support
drivers/infiniband/core/Makefile | 2 +-
drivers/infiniband/core/uverbs.h | 2 +
drivers/infiniband/core/uverbs_cmd.c | 88 ++++++-
drivers/infiniband/core/uverbs_std_types.c | 3 +-
.../infiniband/core/uverbs_std_types_counters.c | 157 +++++++++++
drivers/infiniband/core/uverbs_std_types_cq.c | 23 +-
.../infiniband/core/uverbs_std_types_flow_action.c | 4 +-
drivers/infiniband/core/verbs.c | 2 +-
drivers/infiniband/hw/mlx4/main.c | 6 +-
drivers/infiniband/hw/mlx5/main.c | 291 ++++++++++++++++++++-
drivers/infiniband/hw/mlx5/mlx5_ib.h | 36 +++
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 23 --
.../net/ethernet/mellanox/mlx5/core/fs_counters.c | 3 +
include/linux/mlx5/fs.h | 23 ++
include/rdma/ib_verbs.h | 43 ++-
include/rdma/uverbs_ioctl.h | 11 +
include/uapi/rdma/ib_user_ioctl_cmds.h | 21 ++
include/uapi/rdma/ib_user_verbs.h | 13 +
include/uapi/rdma/mlx5-abi.h | 14 +
19 files changed, 701 insertions(+), 64 deletions(-)
create mode 100644 drivers/infiniband/core/uverbs_std_types_counters.c
^ permalink raw reply
* Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs
From: Michael Tuexen @ 2018-05-27 8:58 UTC (permalink / raw)
To: Dmitry Vyukov
Cc: Neil Horman, Xin Long, network dev, linux-sctp, David Miller,
David Ahern, Eric Dumazet, Marcelo Ricardo Leitner, syzkaller
In-Reply-To: <CACT4Y+YozRSfcoUoKHOWy5wujhVdks38vcfNGhwNj-REWcd-hw@mail.gmail.com>
> On 26. May 2018, at 17:50, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Sat, May 26, 2018 at 5:42 PM, Michael Tuexen
> <michael.tuexen@lurchi.franken.de> wrote:
>>> On 25. May 2018, at 21:13, Neil Horman <nhorman@tuxdriver.com> wrote:
>>>
>>> On Sat, May 26, 2018 at 01:41:02AM +0800, Xin Long wrote:
>>>> syzbot reported a rcu_sched self-detected stall on CPU which is caused
>>>> by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
>>>> value, hb_timer will get stuck there, as in its timer handler it starts
>>>> this timer again with this value, then goes to the timer handler again.
>>>>
>>>> This problem is there since very beginning, and thanks to Eric for the
>>>> reproducer shared from a syzbot mail.
>>>>
>>>> This patch fixes it by not allowing to set rto_min with a value below
>>>> 200 msecs, which is based on TCP's, by either setsockopt or sysctl.
>>>>
>>>> Reported-by: syzbot+3dcd59a1f907245f891f@syzkaller.appspotmail.com
>>>> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>>> ---
>>>> include/net/sctp/constants.h | 1 +
>>>> net/sctp/socket.c | 10 +++++++---
>>>> net/sctp/sysctl.c | 3 ++-
>>>> 3 files changed, 10 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
>>>> index 20ff237..2ee7a7b 100644
>>>> --- a/include/net/sctp/constants.h
>>>> +++ b/include/net/sctp/constants.h
>>>> @@ -277,6 +277,7 @@ enum { SCTP_MAX_GABS = 16 };
>>>> #define SCTP_RTO_INITIAL (3 * 1000)
>>>> #define SCTP_RTO_MIN (1 * 1000)
>>>> #define SCTP_RTO_MAX (60 * 1000)
>>>> +#define SCTP_RTO_HARD_MIN 200
>>>>
>>>> #define SCTP_RTO_ALPHA 3 /* 1/8 when converted to right shifts. */
>>>> #define SCTP_RTO_BETA 2 /* 1/4 when converted to right shifts. */
>>>> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
>>>> index ae7e7c6..6ef12c7 100644
>>>> --- a/net/sctp/socket.c
>>>> +++ b/net/sctp/socket.c
>>>> @@ -3029,7 +3029,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
>>>> * be changed.
>>>> *
>>>> */
>>>> -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen)
>>>> +static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval,
>>>> + unsigned int optlen)
>>>> {
>>>> struct sctp_rtoinfo rtoinfo;
>>>> struct sctp_association *asoc;
>>>> @@ -3056,10 +3057,13 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne
>>>> else
>>>> rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max;
>>>>
>>>> - if (rto_min)
>>>> + if (rto_min) {
>>>> + if (rto_min < SCTP_RTO_HARD_MIN)
>>>> + return -EINVAL;
>>>> rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min;
>>>> - else
>>>> + } else {
>>>> rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min;
>>>> + }
>>>>
>>>> if (rto_min > rto_max)
>>>> return -EINVAL;
>>>> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
>>>> index 33ca5b7..7ec854a 100644
>>>> --- a/net/sctp/sysctl.c
>>>> +++ b/net/sctp/sysctl.c
>>>> @@ -52,6 +52,7 @@ static int rto_alpha_min = 0;
>>>> static int rto_beta_min = 0;
>>>> static int rto_alpha_max = 1000;
>>>> static int rto_beta_max = 1000;
>>>> +static int rto_hard_min = SCTP_RTO_HARD_MIN;
>>>>
>>>> static unsigned long max_autoclose_min = 0;
>>>> static unsigned long max_autoclose_max =
>>>> @@ -116,7 +117,7 @@ static struct ctl_table sctp_net_table[] = {
>>>> .maxlen = sizeof(unsigned int),
>>>> .mode = 0644,
>>>> .proc_handler = proc_sctp_do_rto_min,
>>>> - .extra1 = &one,
>>>> + .extra1 = &rto_hard_min,
>>>> .extra2 = &init_net.sctp.rto_max
>>>> },
>>>> {
>>>> --
>>>> 2.1.0
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>> Patch looks fine, you probably want to note this hard minimum in man(7) sctp as
>>> well
>>>
>> I'm aware of some signalling networks which use RTO.min of smaller values than 200ms.
>> So could this be reduced?
>
> Hi Michael,
>
> What value do they use?
I have seen values of
RTO.Min = 50ms
RTO.Max = 200ms
RTO.Initial = 100ms
Best regards
Michael
>
> Xin, Neil, is there more principled way of ensuring that a timer won't
> cause a hard CPU stall? There are slow machines and there are slow
> kernels (in particular syzbot kernel has tons of debug configs
> enabled). 200ms _should_ not cause problems because we did not see
> them with tcp. But it's hard to say what's the low limit as we are
> trying to put a hard upper bound on execution time of a complex
> section of code. Is there something like cond_resched for timers?
^ permalink raw reply
* [PATCH net-next 3/3] mlxsw: pci: Utilize MRSR register to perform FW reset
From: Ido Schimmel @ 2018-05-27 6:56 UTC (permalink / raw)
To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20180527065615.1329-1-idosch@mellanox.com>
From: Jiri Pirko <jiri@mellanox.com>
So far, the PCI BAR0 register is used for triggering FW reset. However,
that is a legacy attitude and it is recommended to use MRSR to perform
reset instead. So do that. Move the reset into init() function as
the cmd interface needs to be used. With that, IRQ initialization needs
to be moved as well. As a side effect, the reset move simplifies
the devlink reload flow.
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.c | 4 +-
drivers/net/ethernet/mellanox/mlxsw/core.h | 2 +-
drivers/net/ethernet/mellanox/mlxsw/pci.c | 130 +++++++++++++----------------
3 files changed, 62 insertions(+), 74 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 1d9ecf89854e..8a766fe28fa0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -966,14 +966,12 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink)
{
struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
- const struct mlxsw_bus *mlxsw_bus = mlxsw_core->bus;
int err;
- if (!mlxsw_bus->reset)
+ if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET))
return -EOPNOTSUPP;
mlxsw_core_bus_device_unregister(mlxsw_core, true);
- mlxsw_bus->reset(mlxsw_core->bus_priv);
err = mlxsw_core_bus_device_register(mlxsw_core->bus_info,
mlxsw_core->bus,
mlxsw_core->bus_priv, true,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 4eac7fbd07d5..4a8d4c7f89d9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -337,6 +337,7 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id)
#define MLXSW_BUS_F_TXRX BIT(0)
+#define MLXSW_BUS_F_RESET BIT(1)
struct mlxsw_bus {
const char *kind;
@@ -344,7 +345,6 @@ struct mlxsw_bus {
const struct mlxsw_config_profile *profile,
struct mlxsw_res *res);
void (*fini)(void *bus_priv);
- void (*reset)(void *bus_priv);
bool (*skb_transmit_busy)(void *bus_priv,
const struct mlxsw_tx_info *tx_info);
int (*skb_transmit)(void *bus_priv, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index db794a1a3a7e..fc4557245ff4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1371,6 +1371,51 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
mbox->mapaddr);
}
+static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
+ const struct pci_device_id *id)
+{
+ unsigned long end;
+ char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ int err;
+
+ mlxsw_reg_mrsr_pack(mrsr_pl);
+ err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ if (err)
+ return err;
+ if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
+ msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
+ return 0;
+ }
+
+ /* We must wait for the HW to become responsive once again. */
+ msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
+
+ end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
+ do {
+ u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
+
+ if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
+ break;
+ cond_resched();
+ } while (time_before(jiffies, end));
+ return 0;
+}
+
+static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
+{
+ int err;
+
+ err = pci_alloc_irq_vectors(mlxsw_pci->pdev, 1, 1, PCI_IRQ_MSIX);
+ if (err < 0)
+ dev_err(&mlxsw_pci->pdev->dev, "MSI-X init failed\n");
+ return err;
+}
+
+static void mlxsw_pci_free_irq_vectors(struct mlxsw_pci *mlxsw_pci)
+{
+ pci_free_irq_vectors(mlxsw_pci->pdev);
+}
+
static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
const struct mlxsw_config_profile *profile,
struct mlxsw_res *res)
@@ -1398,6 +1443,16 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (err)
goto err_out_mbox_alloc;
+ err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
+ if (err)
+ goto err_sw_reset;
+
+ err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
+ if (err < 0) {
+ dev_err(&pdev->dev, "MSI-X init failed\n");
+ goto err_alloc_irq;
+ }
+
err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
if (err)
goto err_query_fw;
@@ -1481,6 +1536,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
err_doorbell_page_bar:
err_iface_rev:
err_query_fw:
+ mlxsw_pci_free_irq_vectors(mlxsw_pci);
+err_alloc_irq:
+err_sw_reset:
mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
err_out_mbox_alloc:
mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
@@ -1496,6 +1554,7 @@ static void mlxsw_pci_fini(void *bus_priv)
free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci);
mlxsw_pci_aqs_fini(mlxsw_pci);
mlxsw_pci_fw_area_fini(mlxsw_pci);
+ mlxsw_pci_free_irq_vectors(mlxsw_pci);
mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
}
@@ -1677,58 +1736,6 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
return err;
}
-static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
- const struct pci_device_id *id)
-{
- unsigned long end;
-
- mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT);
- if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
- msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
- return 0;
- }
-
- /* Reset needs to be written before we read control register, and
- * we must wait for the HW to become responsive once again
- */
- wmb();
- msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
-
- end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
- do {
- u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
-
- if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
- break;
- cond_resched();
- } while (time_before(jiffies, end));
- return 0;
-}
-
-static void mlxsw_pci_free_irq_vectors(struct mlxsw_pci *mlxsw_pci)
-{
- pci_free_irq_vectors(mlxsw_pci->pdev);
-}
-
-static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
-{
- int err;
-
- err = pci_alloc_irq_vectors(mlxsw_pci->pdev, 1, 1, PCI_IRQ_MSIX);
- if (err < 0)
- dev_err(&mlxsw_pci->pdev->dev, "MSI-X init failed\n");
- return err;
-}
-
-static void mlxsw_pci_reset(void *bus_priv)
-{
- struct mlxsw_pci *mlxsw_pci = bus_priv;
-
- mlxsw_pci_free_irq_vectors(mlxsw_pci);
- mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
- mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
-}
-
static const struct mlxsw_bus mlxsw_pci_bus = {
.kind = "pci",
.init = mlxsw_pci_init,
@@ -1736,8 +1743,7 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
.skb_transmit_busy = mlxsw_pci_skb_transmit_busy,
.skb_transmit = mlxsw_pci_skb_transmit,
.cmd_exec = mlxsw_pci_cmd_exec,
- .features = MLXSW_BUS_F_TXRX,
- .reset = mlxsw_pci_reset,
+ .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
};
static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -1795,18 +1801,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
mlxsw_pci->pdev = pdev;
pci_set_drvdata(pdev, mlxsw_pci);
- err = mlxsw_pci_sw_reset(mlxsw_pci, id);
- if (err) {
- dev_err(&pdev->dev, "Software reset failed\n");
- goto err_sw_reset;
- }
-
- err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
- if (err < 0) {
- dev_err(&pdev->dev, "MSI-X init failed\n");
- goto err_msix_init;
- }
-
mlxsw_pci->bus_info.device_kind = driver_name;
mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
mlxsw_pci->bus_info.dev = &pdev->dev;
@@ -1823,9 +1817,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
err_bus_device_register:
- mlxsw_pci_free_irq_vectors(mlxsw_pci);
-err_msix_init:
-err_sw_reset:
iounmap(mlxsw_pci->hw_addr);
err_ioremap:
err_pci_resource_len_check:
@@ -1843,7 +1834,6 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
- mlxsw_pci_free_irq_vectors(mlxsw_pci);
iounmap(mlxsw_pci->hw_addr);
pci_release_regions(mlxsw_pci->pdev);
pci_disable_device(mlxsw_pci->pdev);
--
2.14.3
^ permalink raw reply related
* [PATCH net-next 2/3] mlxsw: cmd: Handle error after reset gracefully
From: Ido Schimmel @ 2018-05-27 6:56 UTC (permalink / raw)
To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20180527065615.1329-1-idosch@mellanox.com>
From: Jiri Pirko <jiri@mellanox.com>
There is an exception in command interface processing in case the MRSR
register is written to. The register triggers FW reset and during the
reset FW returns an error. So handle this by ignoring this error while
writing to MRSR register.
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/cmd.h | 16 +++++++++++-----
drivers/net/ethernet/mellanox/mlxsw/core.c | 26 +++++++++++++++++++-------
2 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 8da91b023b13..2bc48054b685 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -58,7 +58,7 @@ static inline void mlxsw_cmd_mbox_zero(char *mbox)
struct mlxsw_core;
int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
- u32 in_mod, bool out_mbox_direct,
+ u32 in_mod, bool out_mbox_direct, bool reset_ok,
char *in_mbox, size_t in_mbox_size,
char *out_mbox, size_t out_mbox_size);
@@ -67,7 +67,7 @@ static inline int mlxsw_cmd_exec_in(struct mlxsw_core *mlxsw_core, u16 opcode,
size_t in_mbox_size)
{
return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false,
- in_mbox, in_mbox_size, NULL, 0);
+ false, in_mbox, in_mbox_size, NULL, 0);
}
static inline int mlxsw_cmd_exec_out(struct mlxsw_core *mlxsw_core, u16 opcode,
@@ -76,7 +76,7 @@ static inline int mlxsw_cmd_exec_out(struct mlxsw_core *mlxsw_core, u16 opcode,
char *out_mbox, size_t out_mbox_size)
{
return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod,
- out_mbox_direct, NULL, 0,
+ out_mbox_direct, false, NULL, 0,
out_mbox, out_mbox_size);
}
@@ -84,7 +84,7 @@ static inline int mlxsw_cmd_exec_none(struct mlxsw_core *mlxsw_core, u16 opcode,
u8 opcode_mod, u32 in_mod)
{
return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false,
- NULL, 0, NULL, 0);
+ false, NULL, 0, NULL, 0);
}
enum mlxsw_cmd_opcode {
@@ -179,6 +179,8 @@ enum mlxsw_cmd_status {
MLXSW_CMD_STATUS_BAD_INDEX = 0x0A,
/* NVMEM checksum/CRC failed. */
MLXSW_CMD_STATUS_BAD_NVMEM = 0x0B,
+ /* Device is currently running reset */
+ MLXSW_CMD_STATUS_RUNNING_RESET = 0x26,
/* Bad management packet (silently discarded). */
MLXSW_CMD_STATUS_BAD_PKT = 0x30,
};
@@ -208,6 +210,8 @@ static inline const char *mlxsw_cmd_status_str(u8 status)
return "BAD_INDEX";
case MLXSW_CMD_STATUS_BAD_NVMEM:
return "BAD_NVMEM";
+ case MLXSW_CMD_STATUS_RUNNING_RESET:
+ return "RUNNING_RESET";
case MLXSW_CMD_STATUS_BAD_PKT:
return "BAD_PKT";
default:
@@ -869,10 +873,12 @@ MLXSW_ITEM32(cmd_mbox, config_profile, cqe_version, 0xB0, 0, 8);
*/
static inline int mlxsw_cmd_access_reg(struct mlxsw_core *mlxsw_core,
+ bool reset_ok,
char *in_mbox, char *out_mbox)
{
return mlxsw_cmd_exec(mlxsw_core, MLXSW_CMD_OPCODE_ACCESS_REG,
- 0, 0, false, in_mbox, MLXSW_CMD_MBOX_SIZE,
+ 0, 0, false, reset_ok,
+ in_mbox, MLXSW_CMD_MBOX_SIZE,
out_mbox, MLXSW_CMD_MBOX_SIZE);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index a38faec45b30..1d9ecf89854e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1480,6 +1480,7 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
{
enum mlxsw_emad_op_tlv_status status;
int err, n_retry;
+ bool reset_ok;
char *in_mbox, *out_mbox, *tmp;
dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n",
@@ -1501,9 +1502,16 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32);
mlxsw_emad_pack_reg_tlv(tmp, reg, payload);
+ /* There is a special treatment needed for MRSR (reset) register.
+ * The command interface will return error after the command
+ * is executed, so tell the lower layer to expect it
+ * and cope accordingly.
+ */
+ reset_ok = reg->id == MLXSW_REG_MRSR_ID;
+
n_retry = 0;
retry:
- err = mlxsw_cmd_access_reg(mlxsw_core, in_mbox, out_mbox);
+ err = mlxsw_cmd_access_reg(mlxsw_core, reset_ok, in_mbox, out_mbox);
if (!err) {
err = mlxsw_emad_process_status(out_mbox, &status);
if (err) {
@@ -1793,7 +1801,7 @@ static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
}
int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
- u32 in_mod, bool out_mbox_direct,
+ u32 in_mod, bool out_mbox_direct, bool reset_ok,
char *in_mbox, size_t in_mbox_size,
char *out_mbox, size_t out_mbox_size)
{
@@ -1816,7 +1824,15 @@ int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
in_mbox, in_mbox_size,
out_mbox, out_mbox_size, &status);
- if (err == -EIO && status != MLXSW_CMD_STATUS_OK) {
+ if (!err && out_mbox) {
+ dev_dbg(mlxsw_core->bus_info->dev, "Output mailbox:\n");
+ mlxsw_core_buf_dump_dbg(mlxsw_core, out_mbox, out_mbox_size);
+ }
+
+ if (reset_ok && err == -EIO &&
+ status == MLXSW_CMD_STATUS_RUNNING_RESET) {
+ err = 0;
+ } else if (err == -EIO && status != MLXSW_CMD_STATUS_OK) {
dev_err(mlxsw_core->bus_info->dev, "Cmd exec failed (opcode=%x(%s),opcode_mod=%x,in_mod=%x,status=%x(%s))\n",
opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod,
in_mod, status, mlxsw_cmd_status_str(status));
@@ -1826,10 +1842,6 @@ int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
in_mod);
}
- if (!err && out_mbox) {
- dev_dbg(mlxsw_core->bus_info->dev, "Output mailbox:\n");
- mlxsw_core_buf_dump_dbg(mlxsw_core, out_mbox, out_mbox_size);
- }
return err;
}
EXPORT_SYMBOL(mlxsw_cmd_exec);
--
2.14.3
^ permalink raw reply related
* [PATCH net-next 0/3] mlxsw: use MRSR register for FW reset
From: Ido Schimmel @ 2018-05-27 6:56 UTC (permalink / raw)
To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
Jiri says:
Introduce a MRSR register definition and use it to do FW reset instead
of existing mechanism using PCI BAR0 register.
Jiri Pirko (3):
mlxsw: reg: Add Management Reset and Shutdown Register
mlxsw: cmd: Handle error after reset gracefully
mlxsw: pci: Utilize MRSR register to perform FW reset
drivers/net/ethernet/mellanox/mlxsw/cmd.h | 16 ++--
drivers/net/ethernet/mellanox/mlxsw/core.c | 30 ++++---
drivers/net/ethernet/mellanox/mlxsw/core.h | 2 +-
drivers/net/ethernet/mellanox/mlxsw/pci.c | 130 +++++++++++++----------------
drivers/net/ethernet/mellanox/mlxsw/reg.h | 25 ++++++
5 files changed, 117 insertions(+), 86 deletions(-)
--
2.14.3
^ permalink raw reply
* [PATCH net-next 1/3] mlxsw: reg: Add Management Reset and Shutdown Register
From: Ido Schimmel @ 2018-05-27 6:56 UTC (permalink / raw)
To: netdev; +Cc: davem, jiri, mlxsw, Ido Schimmel
In-Reply-To: <20180527065615.1329-1-idosch@mellanox.com>
From: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/reg.h | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 3f4d7e22cece..1877d9f8a11a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -7034,6 +7034,30 @@ static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
mlxsw_reg_mpar_pa_id_set(payload, pa_id);
}
+/* MRSR - Management Reset and Shutdown Register
+ * ---------------------------------------------
+ * MRSR register is used to reset or shutdown the switch or
+ * the entire system (when applicable).
+ */
+#define MLXSW_REG_MRSR_ID 0x9023
+#define MLXSW_REG_MRSR_LEN 0x08
+
+MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
+
+/* reg_mrsr_command
+ * Reset/shutdown command
+ * 0 - do nothing
+ * 1 - software reset
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);
+
+static inline void mlxsw_reg_mrsr_pack(char *payload)
+{
+ MLXSW_REG_ZERO(mrsr, payload);
+ mlxsw_reg_mrsr_command_set(payload, 1);
+}
+
/* MLCR - Management LED Control Register
* --------------------------------------
* Controls the system LEDs.
@@ -7898,6 +7922,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mcia),
MLXSW_REG(mpat),
MLXSW_REG(mpar),
+ MLXSW_REG(mrsr),
MLXSW_REG(mlcr),
MLXSW_REG(mpsc),
MLXSW_REG(mcqi),
--
2.14.3
^ permalink raw reply related
* [PATCH net] mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG
From: Ido Schimmel @ 2018-05-27 6:48 UTC (permalink / raw)
To: netdev; +Cc: davem, jiri, petrm, mlxsw, Ido Schimmel
From: Petr Machata <petrm@mellanox.com>
VLAN 1 is internally used for untagged traffic. Prevent creation of
explicit netdevice for that VLAN, because that currently isn't supported
and leads to the NULL pointer dereference cited below.
Fix by preventing creation of VLAN devices with VID of 1 over mlxsw
devices or LAG devices that involve mlxsw devices.
[ 327.175816] ================================================================================
[ 327.184544] UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c:200:12
[ 327.193667] member access within null pointer of type 'const struct mlxsw_sp_fid'
[ 327.201226] CPU: 0 PID: 8983 Comm: ip Not tainted 4.17.0-rc4-petrm_net_ip6gre_headroom-custom-140 #11
[ 327.210496] Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2F"/"SA000874", BIOS 4.6.5 03/08/2016
[ 327.219872] Call Trace:
[ 327.222384] dump_stack+0xc3/0x12b
[ 327.234007] ubsan_epilogue+0x9/0x49
[ 327.237638] ubsan_type_mismatch_common+0x1f9/0x2d0
[ 327.255769] __ubsan_handle_type_mismatch+0x90/0xa7
[ 327.264716] mlxsw_sp_fid_type+0x35/0x50 [mlxsw_spectrum]
[ 327.270255] mlxsw_sp_port_vlan_router_leave+0x46/0xc0 [mlxsw_spectrum]
[ 327.277019] mlxsw_sp_inetaddr_port_vlan_event+0xe1/0x340 [mlxsw_spectrum]
[ 327.315031] mlxsw_sp_netdevice_vrf_event+0xa8/0x100 [mlxsw_spectrum]
[ 327.321626] mlxsw_sp_netdevice_event+0x276/0x430 [mlxsw_spectrum]
[ 327.367863] notifier_call_chain+0x4c/0x150
[ 327.372128] __netdev_upper_dev_link+0x1b3/0x260
[ 327.399450] vrf_add_slave+0xce/0x170 [vrf]
[ 327.403703] do_setlink+0x658/0x1d70
[ 327.508998] rtnl_newlink+0x908/0xf20
[ 327.559128] rtnetlink_rcv_msg+0x50c/0x720
[ 327.571720] netlink_rcv_skb+0x16a/0x1f0
[ 327.583450] netlink_unicast+0x2ca/0x3e0
[ 327.599305] netlink_sendmsg+0x3e2/0x7f0
[ 327.616655] sock_sendmsg+0x76/0xc0
[ 327.620207] ___sys_sendmsg+0x494/0x5d0
[ 327.666117] __sys_sendmsg+0xc2/0x130
[ 327.690953] do_syscall_64+0x66/0x370
[ 327.694677] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 327.699782] RIP: 0033:0x7f4c2f3f8037
[ 327.703393] RSP: 002b:00007ffe8c389708 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 327.711035] RAX: ffffffffffffffda RBX: 000000005b03f53e RCX: 00007f4c2f3f8037
[ 327.718229] RDX: 0000000000000000 RSI: 00007ffe8c389760 RDI: 0000000000000003
[ 327.725431] RBP: 00007ffe8c389760 R08: 0000000000000000 R09: 00007f4c2f443630
[ 327.732632] R10: 00000000000005eb R11: 0000000000000246 R12: 0000000000000000
[ 327.739833] R13: 00000000006774e0 R14: 00007ffe8c3897e8 R15: 0000000000000000
[ 327.747096] ================================================================================
Fixes: 9589a7b5d7d9 ("mlxsw: spectrum: Handle VLAN devices linking / unlinking")
Suggested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ca38a30fbe91..adc6ab2cf429 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4433,6 +4433,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
return -EINVAL;
}
+ if (is_vlan_dev(upper_dev) &&
+ vlan_dev_vlan_id(upper_dev) == 1) {
+ NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic");
+ return -EINVAL;
+ }
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
--
2.14.3
^ permalink raw reply related
* Re: net-next boot error: KASAN: use-after-free Write in call_usermodehelper_exec_work
From: Dmitry Vyukov @ 2018-05-27 5:40 UTC (permalink / raw)
To: syzbot, netdev, David Miller; +Cc: LKML, mcgrof, syzkaller-bugs
In-Reply-To: <000000000000424989056d295959@google.com>
On Sun, May 27, 2018 at 7:34 AM, syzbot
<syzbot+9269ae80345087b898d0@syzkaller.appspotmail.com> wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit: 5b79c2af667c Merge git://git.kernel.org/pub/scm/linux/kern..
> git tree: net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=16087fa7800000
> kernel config: https://syzkaller.appspot.com/x/.config?x=e4078980b886800c
> dashboard link: https://syzkaller.appspot.com/bug?extid=9269ae80345087b898d0
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+9269ae80345087b898d0@syzkaller.appspotmail.com
This first happened just now on net-next, so +net maintainers.
This happened during boot, so no separate reproducer.
> FS-Cache: Loaded
> CacheFiles: Loaded
> pnp: PnP ACPI init
> pnp: PnP ACPI: found 7 devices
> ==================================================================
> BUG: KASAN: use-after-free in call_usermodehelper_exec_work+0x2d3/0x310
> kernel/umh.c:195
> Write of size 4 at addr ffff8801d63bd370 by task kworker/u4:0/6
>
> CPU: 0 PID: 6 Comm: kworker/u4:0 Not tainted 4.17.0-rc6+ #65
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Workqueue: events_unbound call_usermodehelper_exec_work
> Call Trace:
> __dump_stack lib/dump_stack.c:77 [inline]
> dump_stack+0x1b9/0x294 lib/dump_stack.c:113
> print_address_description+0x6c/0x20b mm/kasan/report.c:256
> kasan_report_error mm/kasan/report.c:354 [inline]
> kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
> __asan_report_store4_noabort+0x17/0x20 mm/kasan/report.c:437
> call_usermodehelper_exec_work+0x2d3/0x310 kernel/umh.c:195
> process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
> worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
> kthread+0x345/0x410 kernel/kthread.c:240
> ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> Allocated by task 1:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> set_track mm/kasan/kasan.c:460 [inline]
> kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
> kmem_cache_alloc_trace+0x152/0x780 mm/slab.c:3620
> kmalloc include/linux/slab.h:512 [inline]
> kzalloc include/linux/slab.h:701 [inline]
> call_usermodehelper_setup+0xe8/0x400 kernel/umh.c:382
> clocksource: acpi_pm: mask: 0xffffff max_cycles: 0xffffff, max_idle_ns:
> 2085701024 ns
> kobject_uevent_env+0xb21/0x1110 lib/kobject_uevent.c:608
> kobject_uevent+0x1f/0x30 lib/kobject_uevent.c:636
> device_add+0xb01/0x16d0 drivers/base/core.c:1843
> device_create_groups_vargs+0x1ff/0x270 drivers/base/core.c:2439
> device_create_vargs drivers/base/core.c:2479 [inline]
> device_create+0xd3/0x100 drivers/base/core.c:2515
> chr_dev_init+0x120/0x158 drivers/char/mem.c:938
> do_one_initcall+0x127/0x913 init/main.c:884
> do_initcall_level init/main.c:952 [inline]
> do_initcalls init/main.c:960 [inline]
> do_basic_setup init/main.c:978 [inline]
> kernel_init_freeable+0x49b/0x58e init/main.c:1135
> kernel_init+0x11/0x1b3 init/main.c:1061
> ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> Freed by task 1296:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> NET: Registered protocol family 2
> set_track mm/kasan/kasan.c:460 [inline]
> __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
> kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
> __cache_free mm/slab.c:3498 [inline]
> kfree+0xd9/0x260 mm/slab.c:3813
> call_usermodehelper_freeinfo kernel/umh.c:45 [inline]
> umh_complete+0x7b/0x90 kernel/umh.c:59
> call_usermodehelper_exec_async+0x6e8/0x9e0 kernel/umh.c:116
> tcp_listen_portaddr_hash hash table entries: 4096 (order: 6, 294912 bytes)
> ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> The buggy address belongs to the object at ffff8801d63bd300
> which belongs to the cache kmalloc-192 of size 192
> The buggy address is located 112 bytes inside of
> 192-byte region [ffff8801d63bd300, ffff8801d63bd3c0)
> The buggy address belongs to the page:
> TCP established hash table entries: 65536 (order: 7, 524288 bytes)
> page:ffffea000758ef40 count:1 mapcount:0 mapping:ffff8801d63bd000 index:0x0
> flags: 0x2fffc0000000100(slab)
> raw: 02fffc0000000100 ffff8801d63bd000 0000000000000000 0000000100000010
> TCP bind hash table entries: 65536 (order: 10, 4194304 bytes)
> raw: ffffea000759c2e0 ffffea0007521be0 ffff8801da800040 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
> ffff8801d63bd200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ffff8801d63bd280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>>
>> ffff8801d63bd300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>
> TCP: Hash tables configured (established 65536 bind 65536)
> ^
> ffff8801d63bd380: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
> ffff8801d63bd400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ==================================================================
> UDP hash table entries: 4096 (order: 7, 655360 bytes)
> UDP-Lite hash table entries: 4096 (order: 7, 655360 bytes)
>
>
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with
> syzbot.
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/syzkaller-bugs/000000000000424989056d295959%40google.com.
> For more options, visit https://groups.google.com/d/optout.
^ permalink raw reply
* Re: [PATCH 3/4] cpsw_switchdev: add switchdev support files
From: kbuild test robot @ 2018-05-27 4:39 UTC (permalink / raw)
To: Ilias Apalodimas
Cc: kbuild-all, netdev, grygorii.strashko, ivan.khoronzhuk, nsekhar,
jiri, ivecera, francois.ozog, yogeshs, spatton, Ilias Apalodimas
In-Reply-To: <1527144984-31236-4-git-send-email-ilias.apalodimas@linaro.org>
[-- Attachment #1: Type: text/plain, Size: 1439 bytes --]
Hi Ilias,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on net/master]
[also build test ERROR on v4.17-rc6]
[cannot apply to net-next/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Ilias-Apalodimas/RFC-CPSW-switchdev-mode/20180527-102334
config: arm-omap2plus_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm
All errors (new ones prefixed by >>):
drivers/net/ethernet/ti/cpsw_switchdev.c: In function 'cpsw_port_switchdev_init':
>> drivers/net/ethernet/ti/cpsw_switchdev.c:298:8: error: 'struct net_device' has no member named 'switchdev_ops'; did you mean 'netdev_ops'?
ndev->switchdev_ops = &cpsw_port_switchdev_ops;
^~~~~~~~~~~~~
netdev_ops
vim +298 drivers/net/ethernet/ti/cpsw_switchdev.c
295
296 void cpsw_port_switchdev_init(struct net_device *ndev)
297 {
> 298 ndev->switchdev_ops = &cpsw_port_switchdev_ops;
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 33598 bytes --]
^ permalink raw reply
* Re: [bpf-next PATCH] bpf: sockhash fix race with bpf_tcp_close and map delete
From: John Fastabend @ 2018-05-27 4:36 UTC (permalink / raw)
To: Daniel Borkmann, ast; +Cc: netdev
In-Reply-To: <1a7bab54-809a-dae4-a0f7-ea1fab2e8c7a@iogearbox.net>
On 05/26/2018 01:30 AM, Daniel Borkmann wrote:
> Hi John,
>
> On 05/25/2018 07:37 PM, John Fastabend wrote:
>> syzbot reported two related splats, a use after free and null
>> pointer dereference, when a TCP socket is closed while the map is
>> also being removed.
>>
>> The psock keeps a reference to all map slots that have a reference
>> to the sock so that when the sock is closed we can clean up any
>> outstanding sock{map|hash} entries. This avoids pinning a sock
>> forever if the map owner fails to do proper cleanup. However, the
>> result is we have two paths that can free an entry in the map. Even
>> the comment in the sock{map|hash} tear down function, sock_hash_free()
>> notes this:
>>
>> At this point no update, lookup or delete operations can happen.
>> However, be aware we can still get a socket state event updates,
>> and data ready callbacks that reference the psock from sk_user_data.
>>
>> Both removal paths omitted taking the hash bucket lock resulting
>> in the case where we have two references that are in the process
>> of being free'd.
>>
>> Reported-by: syzbot+a761b81c211794fa1072@syzkaller.appspotmail.com
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
^ permalink raw reply
* Re: [PATCH] net: netsec: reduce DMA mask to 40 bits
From: Jassi Brar @ 2018-05-27 4:33 UTC (permalink / raw)
To: Ard Biesheuvel
Cc: Robin Murphy, <netdev@vger.kernel.org>, David S. Miller,
Masahisa Kojima, Ilias Apalodimas, nd
In-Reply-To: <CAKv+Gu-pcODD6+e3uHL-gTOUbQcZrh1N5rvzMeVQkVqAYkPu6w@mail.gmail.com>
On 26 May 2018 at 11:46, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 26 May 2018 at 05:44, Jassi Brar <jaswinder.singh@linaro.org> wrote:
>> On 26 May 2018 at 08:56, Jassi Brar <jaswinder.singh@linaro.org> wrote:
>>> On 26 May 2018 at 01:07, Robin Murphy <robin.murphy@arm.com> wrote:
>>>> On Sat, 26 May 2018 00:33:05 +0530
>>>> Jassi Brar <jaswinder.singh@linaro.org> wrote:
>>>>
>>>>> On 25 May 2018 at 18:20, Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>>>> wrote:
>>>>> > The netsec network controller IP can drive 64 address bits for DMA,
>>>>> > and the DMA mask is set accordingly in the driver. However, the
>>>>> > SynQuacer SoC, which is the only silicon incorporating this IP at
>>>>> > the moment, integrates this IP in a manner that leaves address bits
>>>>> > [63:40] unconnected.
>>>>> >
>>>>> > Up until now, this has not resulted in any problems, given that the
>>>>> > DDR controller doesn't decode those bits to begin with. However,
>>>>> > recent firmware updates for platforms incorporating this SoC allow
>>>>> > the IOMMU to be enabled, which does decode address bits [47:40],
>>>>> > and allocates top down from the IOVA space, producing DMA addresses
>>>>> > that have bits set that have been left unconnected.
>>>>> >
>>>>> > Both the DT and ACPI (IORT) descriptions of the platform take this
>>>>> > into account, and only describe a DMA address space of 40 bits
>>>>> > (using either dma-ranges DT properties, or DMA address limits in
>>>>> > IORT named component nodes). However, even though our IOMMU and bus
>>>>> > layers may take such limitations into account by setting a narrower
>>>>> > DMA mask when creating the platform device, the netsec probe()
>>>>> > entrypoint follows the common practice of setting the DMA mask
>>>>> > uncondionally, according to the capabilities of the IP block itself
>>>>> > rather than to its integration into the chip.
>>>>> >
>>>>> > It is currently unclear what the correct fix is here. We could hack
>>>>> > around it by only setting the DMA mask if it deviates from its
>>>>> > default value of DMA_BIT_MASK(32). However, this makes it
>>>>> > impossible for the bus layer to use DMA_BIT_MASK(32) as the bus
>>>>> > limit, and so it appears that a more comprehensive approach is
>>>>> > required to take DMA limits imposed by the SoC as a whole into
>>>>> > account.
>>>>> >
>>>>> > In the mean time, let's limit the DMA mask to 40 bits. Given that
>>>>> > there is currently only one SoC that incorporates this IP, this is
>>>>> > a reasonable approach that can be backported to -stable and buys us
>>>>> > some time to come up with a proper fix going forward.
>>>>> >
>>>>> I am sure you already thought about it, but why not let the platform
>>>>> specify the bit mask for the driver (via some "bus-width" property),
>>>>> to override the default 64 bit mask?
>>>>
>>>> Because lack of a property to describe the integration is not the
>>>> problem. There are already at least two ways: the general DT/IORT
>>>> properties for describing DMA addressing - which it would be a bit
>>>> ungainly for a driver to parse for this reason, but not impossible -
>>> ....
>>>
>>>
>>>> and inferring it from a SoC-specific compatible - which is more
>>>> appropriate, and what we happen to be able to do here.
>>>>
>>> Sorry, I am not sure I follow. This patch changes from 64-bits default
>>> to 40-bits capability without checking for the parent SoC. If the next
>>> generation implements the full 64-bit or just 32-bit bus, we'll be
>>> back in the pit again. No?
>>>
>> Probably you meant we'll change the ethernet compatible string for
>> differently capable SoC. OK, but here it is more of integration issue
>> than controller version.
>>
>> Which makes me realise the extant compatible property for netsec is
>> not so correct (it embeds the platform name). So I am ok either way.
>>
>
> The platform in question has a dma-ranges DT property at the root
> level that only describes 40 bits' worth of DMA. Also, the ACPI
> description in the IORT table of the IOMMU integration of the netsec
> controller limits DMA to 40 bits. In the latter case, we actually
> enter netsec_probe() with the correct value already assigned to the
> DMA mask fields. (In the former case, the DMA limit is ignored
> entirely)
>
> In other words, we can already describe these SoC limitations and
> distinguish them from device limitations. The problem is that drivers
> ignore the existing values of DMA mask.
>
> Robin has volunteered to look into fixing this, but this cannot be
> done in a way that is suitable for -stable. In the mean time, we have
> a single platform using this network IP in the field that cannot
> upgrade its firmware to a version that describes the IOMMU, because
> the existing DMA layer code will start driving address bits that are
> correctly described as unconnected by the DT/ACPI tables.
>
> So as a a workaround, until Robin fixes things properly, let's reduce
> the DMA mask to 40 bits.
>
Yeah no point in introducing another dt property if this hack is
temporary until the core is fixed.
FWIW ... Acked-by: Jassi Brar <jaswinder.singh@linaro.org>
Thanks.
^ permalink raw reply
* Re: [PATCH, net-next] qcom-emag: hide ACPI specific functions
From: Timur Tabi @ 2018-05-27 1:30 UTC (permalink / raw)
To: Arnd Bergmann, David S. Miller; +Cc: Hemanth Puranik, netdev, linux-kernel
In-Reply-To: <d625691f-a240-23ee-3751-d577c5e72423@codeaurora.org>
On 5/25/18 7:22 PM, Timur Tabi wrote:
> - phy->open = emac_sgmii_open;
> - phy->close = emac_sgmii_close;
> - phy->link_up = emac_sgmii_link_up;
> - phy->link_down = emac_sgmii_link_down;
>
> I'll take it look at it next week when I'm back in the office.
I posted a patch that fixes this problem and also retains device-tree
support.
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply
* [PATCH] net: qcom/emac: fix device tree initialization
From: Timur Tabi @ 2018-05-27 1:29 UTC (permalink / raw)
To: David S. Miller, Arnd Bergmann, Hemanth Puranik, netdev; +Cc: timur
Commit "net: qcom/emac: Encapsulate sgmii ops under one structure"
introduced the sgmii_ops structure, but did not correctly initialize
it on device tree platforms. This resulted in compiler warnings when
ACPI is not enabled.
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
---
drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 562420b834df..e78e5db39458 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -273,6 +273,14 @@ static int emac_sgmii_common_link_change(struct emac_adapter *adpt, bool linkup)
return 0;
}
+static struct sgmii_ops fsm9900_ops = {
+ .init = emac_sgmii_init_fsm9900,
+ .open = emac_sgmii_common_open,
+ .close = emac_sgmii_common_close,
+ .link_change = emac_sgmii_common_link_change,
+ .reset = emac_sgmii_common_reset,
+};
+
static struct sgmii_ops qdf2432_ops = {
.init = emac_sgmii_init_qdf2432,
.open = emac_sgmii_common_open,
@@ -281,6 +289,7 @@ static int emac_sgmii_common_link_change(struct emac_adapter *adpt, bool linkup)
.reset = emac_sgmii_common_reset,
};
+#ifdef CONFIG_ACPI
static struct sgmii_ops qdf2400_ops = {
.init = emac_sgmii_init_qdf2400,
.open = emac_sgmii_common_open,
@@ -288,6 +297,7 @@ static int emac_sgmii_common_link_change(struct emac_adapter *adpt, bool linkup)
.link_change = emac_sgmii_common_link_change,
.reset = emac_sgmii_common_reset,
};
+#endif
static int emac_sgmii_acpi_match(struct device *dev, void *data)
{
@@ -335,11 +345,11 @@ static int emac_sgmii_acpi_match(struct device *dev, void *data)
static const struct of_device_id emac_sgmii_dt_match[] = {
{
.compatible = "qcom,fsm9900-emac-sgmii",
- .data = emac_sgmii_init_fsm9900,
+ .data = &fsm9900_ops,
},
{
.compatible = "qcom,qdf2432-emac-sgmii",
- .data = emac_sgmii_init_qdf2432,
+ .data = &qdf2432_ops,
},
{}
};
@@ -386,7 +396,7 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
goto error_put_device;
}
- phy->sgmii_ops->init = match->data;
+ phy->sgmii_ops = (struct sgmii_ops *)match->data;
}
/* Base address is the first address */
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related
* [PATCH net] VSOCK: check sk state before receive
From: Hangbin Liu @ 2018-05-27 1:02 UTC (permalink / raw)
To: netdev; +Cc: Stefan Hajnoczi, Jorgen Hansen, David S. Miller, Hangbin Liu
Since vmci_transport_recv_dgram_cb is a callback function and we access the
socket struct without holding the lock here, there is a possibility that
sk has been released and we use it again. This may cause a NULL pointer
dereference later, while receiving. Here is the call trace:
[ 389.486319] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[ 389.494148] PGD 0 P4D 0
[ 389.496687] Oops: 0000 [#1] SMP PTI
[ 389.500170] Modules linked in: vhost_net vmw_vsock_vmci_transport tun vsock vhost vmw_vmci tap iptable_security iptable_raw iptable_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_s
[ 389.510984] Failed to add new resource (handle=0x2:0x2711), error: -22
[ 389.543309] Failed to add new resource (handle=0x2:0x2711), error: -22
[ 389.570936] ttm drm crc32c_intel mptsas scsi_transport_sas serio_raw ata_piix mptscsih libata i2c_core mptbase bnx2 dm_mirror dm_region_hash dm_log dm_mod
[ 389.597899] CPU: 3 PID: 113 Comm: kworker/3:2 Tainted: G I 4.17.0-rc6.latest+ #25
[ 389.606673] Hardware name: Dell Inc. PowerEdge R710/0XDX06, BIOS 6.1.0 10/18/2011
[ 389.614158] Workqueue: events dg_delayed_dispatch [vmw_vmci]
[ 389.619820] RIP: 0010:selinux_socket_sock_rcv_skb+0x46/0x270
[ 389.625475] RSP: 0018:ffffbcb5416b7ce0 EFLAGS: 00010293
[ 389.630698] RAX: 0000000000000000 RBX: 0000000000000028 RCX: 0000000000000007
[ 389.637825] RDX: 0000000000000000 RSI: ffff94a29feec500 RDI: ffffbcb5416b7d18
[ 389.644953] RBP: ffff94a29bd9a640 R08: 0000000000000001 R09: ffff94a187c03080
[ 389.652080] R10: ffffbcb5416b7d80 R11: 0000000000000000 R12: ffffbcb5416b7d18
[ 389.659206] R13: ffff94a29feec500 R14: ffff94a2afda5e00 R15: 0ffff94a2afda5e0
[ 389.666336] FS: 0000000000000000(0000) GS:ffff94a2afd80000(0000) knlGS:0000000000000000
[ 389.674419] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 389.680160] CR2: 0000000000000010 CR3: 000000004320a003 CR4: 00000000000206e0
[ 389.687283] Call Trace:
[ 389.689738] ? __alloc_skb+0xa0/0x230
[ 389.693407] security_sock_rcv_skb+0x32/0x60
[ 389.697679] ? __alloc_skb+0xa0/0x230
[ 389.701343] sk_filter_trim_cap+0x4e/0x1f0
[ 389.705442] __sk_receive_skb+0x32/0x290
[ 389.709372] vmci_transport_recv_dgram_cb+0xa7/0xd0 [vmw_vsock_vmci_transport]
[ 389.716593] dg_delayed_dispatch+0x22/0x50 [vmw_vmci]
[ 389.721648] process_one_work+0x1f2/0x4a0
[ 389.725662] worker_thread+0x38/0x4c0
[ 389.729329] ? process_one_work+0x4a0/0x4a0
[ 389.733512] kthread+0x12f/0x150
[ 389.736743] ? kthread_create_worker_on_cpu+0x90/0x90
[ 389.741796] ret_from_fork+0x35/0x40
[ 389.745370] Code: 8b 04 25 28 00 00 00 48 89 44 24 70 31 c0 e8 42 15 db ff 0f b7 5d 10 48 8b 85 70 02 00 00 4c 8d 64 24 38 b9 07 00 00 00 4c 89 e7 <44> 8b 70 10 31 c0 41 89 df 41 83 e7 f7
[ 389.764342] RIP: selinux_socket_sock_rcv_skb+0x46/0x270 RSP: ffffbcb5416b7ce0
[ 389.771467] CR2: 0000000000000010
[ 389.774784] ---[ end trace e83d65291a15ae6a ]---
Fix it by checking sk state before using it.
Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
net/vmw_vsock/vmci_transport.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index a7a73ff..0d26040 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -612,6 +612,13 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
if (!vmci_transport_allow_dgram(vsk, dg->src.context))
return VMCI_ERROR_NO_ACCESS;
+ bh_lock_sock(sk);
+ if (sk->sk_state == TCP_CLOSE) {
+ bh_unlock_sock(sk);
+ return VMCI_ERROR_DATAGRAM_FAILED;
+ }
+ bh_unlock_sock(sk);
+
size = VMCI_DG_SIZE(dg);
/* Attach the packet to the socket's receive queue as an sk_buff. */
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox