Netdev List
 help / color / mirror / Atom feed
* [PATCH rdma-next v2 12/13] IB/mlx5: Add flow counters read support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

Implements the flow counters read wrapper.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 15 +++++++++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 13 ++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f846956833e5..f4da59e39c9e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3149,6 +3149,19 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev,
 	}
 }

+static int read_flow_counters(struct ib_device *ibdev,
+			      struct mlx5_read_counters_attr *read_attr)
+{
+	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+	return mlx5_fc_query(dev->mdev, fc->id,
+			     &read_attr->out[IB_COUNTER_PACKETS],
+			     &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
 static int counters_set_description(struct ib_counters *counters,
 				    enum mlx5_ib_counters_type counters_type,
 				    u32 *desc_data,
@@ -3163,6 +3176,8 @@ static int counters_set_description(struct ib_counters *counters,

 	/* init the fields for the object */
 	mcounters->type = counters_type;
+	mcounters->read_counters = read_flow_counters;
+	mcounters->counters_num = FLOW_COUNTERS_NUM;
 	mcounters->ncounters = ncounters;
 	/* each counter entry have both description and index pair */
 	for (i = 0; i < ncounters * 2; i += 2) {
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 7313d3cd04f0..810557b5a5c1 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -814,6 +814,12 @@ struct mlx5_memic {
 	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
 };

+struct mlx5_read_counters_attr {
+	struct mlx5_fc *hw_cntrs_hndl;
+	u64 *out;
+	u32 flags;
+};
+
 enum mlx5_ib_counters_type {
 	MLX5_IB_COUNTERS_FLOW,
 };
@@ -821,7 +827,12 @@ enum mlx5_ib_counters_type {
 struct mlx5_ib_mcounters {
 	struct ib_counters ibcntrs;
 	enum mlx5_ib_counters_type type;
-	void *hw_cntrs_hndl;
+	/* number of counters supported for this counters type */
+	u32 counters_num;
+	struct mlx5_fc *hw_cntrs_hndl;
+	/* read function for this counters type */
+	int (*read_counters)(struct ib_device *ibdev,
+			     struct mlx5_read_counters_attr *read_attr);
 	/* max index set as part of create_flow */
 	u32 cntrs_max_index;
 	/* number of counters data entries (<description,index> pair) */

^ permalink raw reply related

* [PATCH mlx5-next v2 11/13] IB/mlx5: Add flow counters binding support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

Associates a counters with a flow when IB_FLOW_SPEC_ACTION_COUNT
is part of the flow specifications.

The counters user space placements of location and description
(index, description) pairs are passed as private data of the
counters flow specification.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 223 ++++++++++++++++++++++++++++++++---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  15 +++
 include/linux/mlx5/fs.h              |   1 +
 include/uapi/rdma/mlx5-abi.h         |  14 +++
 4 files changed, 239 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 18bfee86fa52..f846956833e5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2472,7 +2472,7 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
 #define LAST_TUNNEL_FIELD tunnel_id
 #define LAST_FLOW_TAG_FIELD tag_id
 #define LAST_DROP_FIELD size
-#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters

 /* Field is the last supported field */
 #define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -2836,6 +2836,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 		if (ret)
 			return ret;
 		break;
+	case IB_FLOW_SPEC_ACTION_COUNT:
+		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+					 LAST_COUNTERS_FIELD))
+			return -EOPNOTSUPP;
+
+		/* for now support only one counters spec per flow */
+		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+			return -EINVAL;
+
+		action->counters = ib_spec->flow_count.counters;
+		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2983,6 +2995,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev,
 	}
 }

+static void counters_clear_description(struct ib_counters *counters)
+{
+	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+	mutex_lock(&mcounters->mcntrs_mutex);
+	kfree(mcounters->counters_data);
+	mcounters->counters_data = NULL;
+	mcounters->cntrs_max_index = 0;
+	mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 {
 	struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
@@ -3002,8 +3025,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)

 	mlx5_del_flow_rules(handler->rule);
 	put_flow_table(dev, handler->prio, true);
-	mutex_unlock(&dev->flow_db->lock);
+	if (handler->ibcounters &&
+	    atomic_read(&handler->ibcounters->usecnt) == 1)
+		counters_clear_description(handler->ibcounters);

+	mutex_unlock(&dev->flow_db->lock);
 	kfree(handler);

 	return 0;
@@ -3123,22 +3149,128 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev,
 	}
 }

+static int counters_set_description(struct ib_counters *counters,
+				    enum mlx5_ib_counters_type counters_type,
+				    u32 *desc_data,
+				    u32 ncounters)
+{
+	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+	u32 cntrs_max_index = 0;
+	int i;
+
+	if (counters_type != MLX5_IB_COUNTERS_FLOW)
+		return -EINVAL;
+
+	/* init the fields for the object */
+	mcounters->type = counters_type;
+	mcounters->ncounters = ncounters;
+	/* each counter entry have both description and index pair */
+	for (i = 0; i < ncounters * 2; i += 2) {
+		if (desc_data[i] > IB_COUNTER_BYTES)
+			return -EINVAL;
+
+		if (cntrs_max_index <= desc_data[i + 1])
+			cntrs_max_index = desc_data[i + 1] + 1;
+	}
+
+	mutex_lock(&mcounters->mcntrs_mutex);
+	mcounters->counters_data = desc_data;
+	mcounters->cntrs_max_index = cntrs_max_index;
+	mutex_unlock(&mcounters->mcntrs_mutex);
+
+	return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+static int flow_counters_set_data(struct ib_counters *ibcounters,
+				  struct mlx5_ib_create_flow *ucmd)
+{
+	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+	u32 *desc_data = NULL;
+	bool hw_hndl = false;
+	int ret = 0;
+
+	if (ucmd && ucmd->ncounters_data != 0) {
+		cntrs_data = ucmd->data;
+		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+			return -EINVAL;
+
+		desc_data = kcalloc(cntrs_data->ncounters,
+				    sizeof(u32) * 2,
+				    GFP_KERNEL);
+		if (!desc_data)
+			return  -ENOMEM;
+
+		if (copy_from_user(desc_data,
+				   u64_to_user_ptr(cntrs_data->counters_data),
+				   sizeof(u32) * 2 * cntrs_data->ncounters)) {
+				ret = -EFAULT;
+				goto free;
+		}
+	}
+
+	if (!mcounters->hw_cntrs_hndl) {
+		mcounters->hw_cntrs_hndl = mlx5_fc_create(
+			to_mdev(ibcounters->device)->mdev, false);
+		if (!mcounters->hw_cntrs_hndl) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		hw_hndl = true;
+	}
+
+	if (desc_data) {
+		/* counters already bound to at least one flow */
+		if (mcounters->cntrs_max_index) {
+			ret = -EINVAL;
+			goto free_hndl;
+		}
+
+		ret = counters_set_description(ibcounters,
+					       MLX5_IB_COUNTERS_FLOW,
+					       desc_data,
+					       cntrs_data->ncounters);
+		if (ret)
+			goto free_hndl;
+
+	} else if (!mcounters->cntrs_max_index) {
+		/* counters not bound yet, must have udata passed */
+			ret = -EINVAL;
+			goto free_hndl;
+	}
+
+	return 0;
+
+free_hndl:
+	if (hw_hndl) {
+		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+				mcounters->hw_cntrs_hndl);
+		mcounters->hw_cntrs_hndl = NULL;
+	}
+free:
+	kfree(desc_data);
+	return ret;
+}
+
 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 						      struct mlx5_ib_flow_prio *ft_prio,
 						      const struct ib_flow_attr *flow_attr,
 						      struct mlx5_flow_destination *dst,
-						      u32 underlay_qpn)
+						      u32 underlay_qpn,
+						      struct mlx5_ib_create_flow *ucmd)
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
 	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 	struct mlx5_flow_spec *spec;
-	struct mlx5_flow_destination *rule_dst = dst;
+	struct mlx5_flow_destination dest_arr[2] = {};
+	struct mlx5_flow_destination *rule_dst = dest_arr;
 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 	unsigned int spec_index;
 	u32 prev_type = 0;
 	int err = 0;
-	int dest_num = 1;
+	int dest_num = 0;
 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;

 	if (!is_valid_attr(dev->mdev, flow_attr))
@@ -3152,6 +3284,10 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	}

 	INIT_LIST_HEAD(&handler->list);
+	if (dst) {
+		memcpy(&dest_arr[0], dst, sizeof(*dst));
+		dest_num++;
+	}

 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 		err = parse_flow_attr(dev->mdev, spec->match_criteria,
@@ -3188,15 +3324,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 		goto free;
 	}

+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		err = flow_counters_set_data(flow_act.counters, ucmd);
+		if (err)
+			goto free;
+
+		handler->ibcounters = flow_act.counters;
+		dest_arr[dest_num].type =
+			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest_arr[dest_num].counter =
+			to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+		dest_num++;
+	}
+
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
-		rule_dst = NULL;
-		dest_num = 0;
+		if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+			rule_dst = NULL;
+			dest_num = 0;
+		}
 	} else {
 		if (is_egress)
 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 		else
 			flow_act.action |=
-				dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+				dest_num ?  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
 					MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}

@@ -3222,8 +3373,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,

 	ft_prio->flow_table = ft;
 free:
-	if (err)
+	if (err && handler) {
+		if (handler->ibcounters &&
+		    atomic_read(&handler->ibcounters->usecnt) == 1)
+			counters_clear_description(handler->ibcounters);
 		kfree(handler);
+	}
 	kvfree(spec);
 	return err ? ERR_PTR(err) : handler;
 }
@@ -3233,7 +3388,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 						     const struct ib_flow_attr *flow_attr,
 						     struct mlx5_flow_destination *dst)
 {
-	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
+	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
 }

 static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
@@ -3373,12 +3528,43 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
 	struct mlx5_ib_flow_prio *ft_prio;
 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+	size_t min_ucmd_sz, required_ucmd_sz;
 	int err;
 	int underlay_qpn;

-	if (udata &&
-	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
-		return ERR_PTR(-EOPNOTSUPP);
+	if (udata && udata->inlen) {
+		min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+				sizeof(ucmd_hdr.reserved);
+		if (udata->inlen < min_ucmd_sz)
+			return ERR_PTR(-EOPNOTSUPP);
+
+		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+		if (err)
+			return ERR_PTR(err);
+
+		/* currently supports only one counters data */
+		if (ucmd_hdr.ncounters_data > 1)
+			return ERR_PTR(-EINVAL);
+
+		required_ucmd_sz = min_ucmd_sz +
+			sizeof(struct mlx5_ib_flow_counters_data) *
+			ucmd_hdr.ncounters_data;
+		if (udata->inlen > required_ucmd_sz &&
+		    !ib_is_udata_cleared(udata, required_ucmd_sz,
+					 udata->inlen - required_ucmd_sz))
+			return ERR_PTR(-EOPNOTSUPP);
+
+		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+		if (!ucmd)
+			return ERR_PTR(-ENOMEM);
+
+		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+		if (err) {
+			kfree(ucmd);
+			return ERR_PTR(err);
+		}
+	}

 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
 		return ERR_PTR(-ENOMEM);
@@ -3433,7 +3619,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 			underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
 					mqp->underlay_qpn : 0;
 			handler = _create_flow_rule(dev, ft_prio, flow_attr,
-						    dst, underlay_qpn);
+						    dst, underlay_qpn, ucmd);
 		}
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
@@ -3454,6 +3640,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,

 	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
+	kfree(ucmd);

 	return &handler->ibflow;

@@ -3464,6 +3651,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 unlock:
 	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
+	kfree(ucmd);
 	kfree(handler);
 	return ERR_PTR(err);
 }
@@ -5128,6 +5316,11 @@ static int mlx5_ib_destroy_counters(struct ib_counters *counters)
 {
 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);

+	counters_clear_description(counters);
+	if (mcounters->hw_cntrs_hndl)
+		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+				mcounters->hw_cntrs_hndl);
+
 	kfree(mcounters);

 	return 0;
@@ -5142,6 +5335,8 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
 	if (!mcounters)
 		return ERR_PTR(-ENOMEM);

+	mutex_init(&mcounters->mcntrs_mutex);
+
 	return &mcounters->ibcntrs;
 }

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index fd27ec1aed08..7313d3cd04f0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -175,6 +175,7 @@ struct mlx5_ib_flow_handler {
 	struct ib_flow			ibflow;
 	struct mlx5_ib_flow_prio	*prio;
 	struct mlx5_flow_handle		*rule;
+	struct ib_counters		*ibcounters;
 };

 struct mlx5_ib_flow_db {
@@ -813,8 +814,22 @@ struct mlx5_memic {
 	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
 };

+enum mlx5_ib_counters_type {
+	MLX5_IB_COUNTERS_FLOW,
+};
+
 struct mlx5_ib_mcounters {
 	struct ib_counters ibcntrs;
+	enum mlx5_ib_counters_type type;
+	void *hw_cntrs_hndl;
+	/* max index set as part of create_flow */
+	u32 cntrs_max_index;
+	/* number of counters data entries (<description,index> pair) */
+	u32 ncounters;
+	/* counters data array for descriptions and indexes */
+	u32 *counters_data;
+	/* protects access to mcounters internal data */
+	struct mutex mcntrs_mutex;
 };

 static inline struct mlx5_ib_mcounters *
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 93aab0f055b4..4612e0ad688b 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -160,6 +160,7 @@ struct mlx5_flow_act {
 	u32 modify_id;
 	uintptr_t esp_id;
 	struct mlx5_fs_vlan vlan;
+	struct ib_counters *counters;
 };

 #define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 508ea8c82da7..ef3f430a7050 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -443,4 +443,18 @@ enum {
 enum {
 	MLX5_IB_CLOCK_INFO_V1              = 0,
 };
+
+struct mlx5_ib_flow_counters_data {
+	__aligned_u64   counters_data;
+	__u32   ncounters;
+	__u32   reserved;
+};
+
+struct mlx5_ib_create_flow {
+	__u32   ncounters_data;
+	__u32   reserved;
+	/* Following are counters data based on ncounters_data */
+	struct mlx5_ib_flow_counters_data data[];
+};
+
 #endif /* MLX5_ABI_USER_H */

^ permalink raw reply related

* [PATCH rdma-next v2 10/13] IB/mlx5: Add counters create and destroy support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

This patch implements the device counters create and destroy APIs
and introducing some internal management structures.

Downstream patches in this series will add the functionality to
support flow counters binding and reading.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 23 +++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 ++++++++++
 2 files changed, 33 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 59f86198eb3b..18bfee86fa52 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5124,6 +5124,27 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev)
 	uverbs_free_spec_tree(dev->ib_dev.specs_root);
 }

+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+	kfree(mcounters);
+
+	return 0;
+}
+
+static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
+						   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_mcounters *mcounters;
+
+	mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
+	if (!mcounters)
+		return ERR_PTR(-ENOMEM);
+
+	return &mcounters->ibcntrs;
+}
+
 void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
@@ -5367,6 +5388,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
 	dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
 	dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
+	dev->ib_dev.create_counters = mlx5_ib_create_counters;
+	dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;

 	err = init_node_data(dev);
 	if (err)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 49a1aa0ff429..fd27ec1aed08 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -813,6 +813,16 @@ struct mlx5_memic {
 	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
 };

+struct mlx5_ib_mcounters {
+	struct ib_counters ibcntrs;
+};
+
+static inline struct mlx5_ib_mcounters *
+to_mcounters(struct ib_counters *ibcntrs)
+{
+	return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
+}
+
 struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
 	struct mlx5_core_dev		*mdev;

^ permalink raw reply related

* [PATCH rdma-next v2 09/13] IB/uverbs: Add support for flow counters
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

The struct ib_uverbs_flow_spec_action_count associates
a counters object with the flow.

Post this association the flow counters can be read via
the counters object.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs.h     |  1 +
 drivers/infiniband/core/uverbs_cmd.c | 81 +++++++++++++++++++++++++++++++-----
 include/uapi/rdma/ib_user_verbs.h    | 13 ++++++
 3 files changed, 84 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5b2461fa634d..c0d40fc3a53a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec {
 		struct ib_uverbs_flow_spec_action_tag	flow_tag;
 		struct ib_uverbs_flow_spec_action_drop	drop;
 		struct ib_uverbs_flow_spec_action_handle action;
+		struct ib_uverbs_flow_spec_action_count flow_count;
 	};
 };

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ddb9d79691be..3179a95c6f5e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2748,43 +2748,82 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 struct ib_uflow_resources {
 	size_t			max;
 	size_t			num;
-	struct ib_flow_action	*collection[0];
+	size_t			collection_num;
+	size_t			counters_num;
+	struct ib_counters	**counters;
+	struct ib_flow_action	**collection;
 };

 static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
 {
 	struct ib_uflow_resources *resources;

-	resources =
-		kmalloc(sizeof(*resources) +
-			num_specs * sizeof(*resources->collection), GFP_KERNEL);
+	resources = kzalloc(sizeof(*resources), GFP_KERNEL);

 	if (!resources)
-		return NULL;
+		goto err_res;
+
+	resources->counters =
+		kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
+
+	if (!resources->counters)
+		goto err_cnt;
+
+	resources->collection =
+		kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
+
+	if (!resources->collection)
+		goto err_collection;

-	resources->num = 0;
 	resources->max = num_specs;

 	return resources;
+
+err_collection:
+	kfree(resources->counters);
+err_cnt:
+	kfree(resources);
+err_res:
+	return NULL;
 }

 void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
 {
 	unsigned int i;

-	for (i = 0; i < uflow_res->num; i++)
+	for (i = 0; i < uflow_res->collection_num; i++)
 		atomic_dec(&uflow_res->collection[i]->usecnt);

+	for (i = 0; i < uflow_res->counters_num; i++)
+		atomic_dec(&uflow_res->counters[i]->usecnt);
+
+	kfree(uflow_res->collection);
+	kfree(uflow_res->counters);
 	kfree(uflow_res);
 }

 static void flow_resources_add(struct ib_uflow_resources *uflow_res,
-			       struct ib_flow_action *action)
+			       enum ib_flow_spec_type type,
+			       void *ibobj)
 {
 	WARN_ON(uflow_res->num >= uflow_res->max);

-	atomic_inc(&action->usecnt);
-	uflow_res->collection[uflow_res->num++] = action;
+	switch (type) {
+	case IB_FLOW_SPEC_ACTION_HANDLE:
+		atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
+		uflow_res->collection[uflow_res->collection_num++] =
+			(struct ib_flow_action *)ibobj;
+		break;
+	case IB_FLOW_SPEC_ACTION_COUNT:
+		atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
+		uflow_res->counters[uflow_res->counters_num++] =
+			(struct ib_counters *)ibobj;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	uflow_res->num++;
 }

 static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
@@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
 			return -EINVAL;
 		ib_spec->action.size =
 			sizeof(struct ib_flow_spec_action_handle);
-		flow_resources_add(uflow_res, ib_spec->action.act);
+		flow_resources_add(uflow_res,
+				   IB_FLOW_SPEC_ACTION_HANDLE,
+				   ib_spec->action.act);
 		uobj_put_obj_read(ib_spec->action.act);
 		break;
+	case IB_FLOW_SPEC_ACTION_COUNT:
+		if (kern_spec->flow_count.size !=
+			sizeof(struct ib_uverbs_flow_spec_action_count))
+			return -EINVAL;
+		ib_spec->flow_count.counters =
+			uobj_get_obj_read(counters,
+					  UVERBS_OBJECT_COUNTERS,
+					  kern_spec->flow_count.handle,
+					  ucontext);
+		if (!ib_spec->flow_count.counters)
+			return -EINVAL;
+		ib_spec->flow_count.size =
+				sizeof(struct ib_flow_spec_action_count);
+		flow_resources_add(uflow_res,
+				   IB_FLOW_SPEC_ACTION_COUNT,
+				   ib_spec->flow_count.counters);
+		uobj_put_obj_read(ib_spec->flow_count.counters);
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 409507f83b91..4f9991de8e3a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -998,6 +998,19 @@ struct ib_uverbs_flow_spec_action_handle {
 	__u32			      reserved1;
 };

+struct ib_uverbs_flow_spec_action_count {
+	union {
+		struct ib_uverbs_flow_spec_hdr hdr;
+		struct {
+			__u32 type;
+			__u16 size;
+			__u16 reserved;
+		};
+	};
+	__u32			      handle;
+	__u32			      reserved1;
+};
+
 struct ib_uverbs_flow_tunnel_filter {
 	__be32 tunnel_id;
 };

^ permalink raw reply related

* [PATCH rdma-next v2 08/13] IB/core: Add support for flow counters
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

A counters object could be attached to flow on creation
by providing the counter specification action.

General counters description which count packets and bytes are
introduced, downstream patches from this series will use them
as part of flow counters binding.

In addition, increase number of flow specifications supported
layers to 10 upon adding count specification and for the
previously added drop specification.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/rdma/ib_verbs.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 80956b1c9f4d..3acf7a9fa452 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1859,9 +1859,10 @@ enum ib_flow_spec_type {
 	IB_FLOW_SPEC_ACTION_TAG         = 0x1000,
 	IB_FLOW_SPEC_ACTION_DROP        = 0x1001,
 	IB_FLOW_SPEC_ACTION_HANDLE	= 0x1002,
+	IB_FLOW_SPEC_ACTION_COUNT       = 0x1003,
 };
 #define IB_FLOW_SPEC_LAYER_MASK	0xF0
-#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 10

 /* Flow steering rule priority is set according to it's domain.
  * Lower domain value means higher priority.
@@ -2041,6 +2042,17 @@ struct ib_flow_spec_action_handle {
 	struct ib_flow_action	     *act;
 };

+enum ib_counters_description {
+	IB_COUNTER_PACKETS,
+	IB_COUNTER_BYTES,
+};
+
+struct ib_flow_spec_action_count {
+	enum ib_flow_spec_type type;
+	u16 size;
+	struct ib_counters *counters;
+};
+
 union ib_flow_spec {
 	struct {
 		u32			type;
@@ -2058,6 +2070,7 @@ union ib_flow_spec {
 	struct ib_flow_spec_action_tag  flow_tag;
 	struct ib_flow_spec_action_drop drop;
 	struct ib_flow_spec_action_handle action;
+	struct ib_flow_spec_action_count flow_count;
 };

 struct ib_flow_attr {

^ permalink raw reply related

* [PATCH mlx5-next v2 02/13] net/mlx5: Export flow counter related API
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

Exports counters API to be used in both IB and EN.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  | 23 ----------------------
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  |  3 +++
 include/linux/mlx5/fs.h                            | 22 +++++++++++++++++++++
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index b6da322a8016..40992aed1791 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -131,29 +131,6 @@ struct mlx5_flow_table {
 	struct rhltable			fgs_hash;
 };

-struct mlx5_fc_cache {
-	u64 packets;
-	u64 bytes;
-	u64 lastuse;
-};
-
-struct mlx5_fc {
-	struct rb_node node;
-	struct list_head list;
-
-	/* last{packets,bytes} members are used when calculating the delta since
-	 * last reading
-	 */
-	u64 lastpackets;
-	u64 lastbytes;
-
-	u32 id;
-	bool deleted;
-	bool aging;
-
-	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
-};
-
 struct mlx5_ft_underlay_qp {
 	struct list_head list;
 	u32 qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index b7ab929d5f8e..10f407843e03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -243,6 +243,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)

 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(mlx5_fc_create);

 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 {
@@ -260,6 +261,7 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 	mlx5_cmd_fc_free(dev, counter->id);
 	kfree(counter);
 }
+EXPORT_SYMBOL(mlx5_fc_destroy);

 int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
@@ -317,6 +319,7 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, u16 id,
 {
 	return mlx5_cmd_fc_query(dev, id, packets, bytes);
 }
+EXPORT_SYMBOL(mlx5_fc_query);

 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 9f4d32e41c06..93aab0f055b4 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -186,6 +186,28 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
+int mlx5_fc_query(struct mlx5_core_dev *dev, u16 id,
+		  u64 *packets, u64 *bytes);
+
+struct mlx5_fc_cache {
+	u64 packets;
+	u64 bytes;
+	u64 lastuse;
+};
+
+struct mlx5_fc {
+	struct rb_node node;
+	struct list_head list;
+
+	u64 lastpackets;
+	u64 lastbytes;
+
+	u32 id;
+	bool deleted;
+	bool aging;
+	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);

^ permalink raw reply related

* [PATCH rdma-next v2 06/13] IB/uverbs: Add read counters support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

This patch exposes the read counters verb to user space
applications.
By that verb the user can read the hardware counters which
are associated with the counters object.

The application needs to provide a sufficient memory to
hold the statistics.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../infiniband/core/uverbs_std_types_counters.c    | 59 +++++++++++++++++++++-
 include/uapi/rdma/ib_user_ioctl_cmds.h             |  7 +++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index a5bc50ceee13..b35fcd3718c8 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -80,6 +80,49 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
 	return ret;
 }

+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
+						       struct ib_uverbs_file *file,
+						       struct uverbs_attr_bundle *attrs)
+{
+	struct ib_counters_read_attr read_attr = {};
+	const struct uverbs_attr *uattr;
+	struct ib_counters *counters =
+		uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
+	int ret;
+
+	if (!ib_dev->read_counters)
+		return -EOPNOTSUPP;
+
+	if (!atomic_read(&counters->usecnt))
+		return -EINVAL;
+
+	ret = uverbs_copy_from(&read_attr.flags, attrs,
+			       UVERBS_ATTR_READ_COUNTERS_FLAGS);
+	if (ret)
+		return ret;
+
+	uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
+	read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
+	read_attr.counters_buff = kcalloc(read_attr.ncounters,
+					  sizeof(u64), GFP_KERNEL);
+	if (!read_attr.counters_buff)
+		return -ENOMEM;
+
+	ret = ib_dev->read_counters(counters,
+				    &read_attr,
+				    attrs);
+	if (ret)
+		goto err_read;
+
+	ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+			     read_attr.counters_buff,
+			     read_attr.ncounters * sizeof(u64));
+
+err_read:
+	kfree(read_attr.counters_buff);
+	return ret;
+}
+
 static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
 			 UVERBS_OBJECT_COUNTERS,
@@ -93,8 +136,22 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));

+#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
+	&UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+			 UVERBS_OBJECT_COUNTERS,
+			 UVERBS_ACCESS_READ,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+			     UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+			    UVERBS_ATTR_TYPE(__u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
 			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
-			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY));
+			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
+			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));

diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index c28ce62d2e40..888ac5975a6c 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -140,9 +140,16 @@ enum uverbs_attrs_destroy_counters_cmd_attr_ids {
 	UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
 };

+enum uverbs_attrs_read_counters_cmd_attr_ids {
+	UVERBS_ATTR_READ_COUNTERS_HANDLE,
+	UVERBS_ATTR_READ_COUNTERS_BUFF,
+	UVERBS_ATTR_READ_COUNTERS_FLAGS,
+};
+
 enum uverbs_methods_actions_counters_ops {
 	UVERBS_METHOD_COUNTERS_CREATE,
 	UVERBS_METHOD_COUNTERS_DESTROY,
+	UVERBS_METHOD_COUNTERS_READ,
 };

 #endif

^ permalink raw reply related

* [PATCH rdma-next v2 05/13] IB/core: Introduce counters read verb
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

The user supplies counters instance and a reference to an output
array of uint64_t.
The driver reads the hardware counters values and writes them to
the output index location in the user supplied array.
All counters values are represented as uint64_t types.

To be able to successfully read the data the counters must be
first bound to an IB object.

Downstream patches will present binding method for
flow counters.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/rdma/ib_verbs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ce3d39725966..f6bd3b97b971 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2219,6 +2219,17 @@ struct ib_counters {
 	atomic_t	usecnt;
 };

+enum ib_read_counters_flags {
+	/* prefer read values from driver cache */
+	IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
+};
+
+struct ib_counters_read_attr {
+	u64	*counters_buff;
+	u32	ncounters;
+	u32	flags; /* use enum ib_read_counters_flags */
+};
+
 struct uverbs_attr_bundle;

 struct ib_device {
@@ -2493,6 +2504,9 @@ struct ib_device {
 	struct ib_counters *	(*create_counters)(struct ib_device *device,
 						   struct uverbs_attr_bundle *attrs);
 	int	(*destroy_counters)(struct ib_counters	*counters);
+	int	(*read_counters)(struct ib_counters *counters,
+				 struct ib_counters_read_attr *counters_read_attr,
+				 struct uverbs_attr_bundle *attrs);

 	/**
 	 * rdma netdev operation

^ permalink raw reply related

* [PATCH rdma-next v2 04/13] IB/uverbs: Add create/destroy counters support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

User space application which uses counters functionality,
is expected to allocate/release the counters resources by
calling create/destroy verbs and in turn get a unique handle
that can be used to attach the counters to its counted type.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/Makefile                   |   2 +-
 drivers/infiniband/core/uverbs.h                   |   1 +
 drivers/infiniband/core/uverbs_std_types.c         |   3 +-
 .../infiniband/core/uverbs_std_types_counters.c    | 100 +++++++++++++++++++++
 include/uapi/rdma/ib_user_ioctl_cmds.h             |  14 +++
 5 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/core/uverbs_std_types_counters.c

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 8d42373a2d8a..61667705d746 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -37,4 +37,4 @@ ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
 				rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
 				uverbs_ioctl_merge.o uverbs_std_types_cq.o \
 				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
-				uverbs_std_types_mr.o
+				uverbs_std_types_mr.o uverbs_std_types_counters.o
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cfb51618ab7a..5b2461fa634d 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -287,6 +287,7 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
 extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
 extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
 extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);

 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 569f48bd821e..b570acbd94af 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -302,7 +302,8 @@ static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
 				  &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
 				  &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
 				  &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+				  &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+				  &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));

 const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
 {
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
new file mode 100644
index 000000000000..a5bc50ceee13
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_counters(struct ib_uobject *uobject,
+				enum rdma_remove_reason why)
+{
+	struct ib_counters *counters = uobject->object;
+
+	if (why == RDMA_REMOVE_DESTROY &&
+	    atomic_read(&counters->usecnt))
+		return -EBUSY;
+
+	return counters->device->destroy_counters(counters);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
+							 struct ib_uverbs_file *file,
+							 struct uverbs_attr_bundle *attrs)
+{
+	struct ib_counters *counters;
+	struct ib_uobject *uobj;
+	int ret;
+
+	/*
+	 * This check should be removed once the infrastructure
+	 * have the ability to remove methods from parse tree once
+	 * such condition is met.
+	 */
+	if (!ib_dev->create_counters)
+		return -EOPNOTSUPP;
+
+	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+	counters = ib_dev->create_counters(ib_dev, attrs);
+	if (IS_ERR(counters)) {
+		ret = PTR_ERR(counters);
+		goto err_create_counters;
+	}
+
+	counters->device = ib_dev;
+	counters->uobject = uobj;
+	uobj->object = counters;
+	atomic_set(&counters->usecnt, 0);
+
+	return 0;
+
+err_create_counters:
+	return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
+	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+			 UVERBS_OBJECT_COUNTERS,
+			 UVERBS_ACCESS_NEW,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
+	uverbs_destroy_def_handler,
+	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+			 UVERBS_OBJECT_COUNTERS,
+			 UVERBS_ACCESS_DESTROY,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
+			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
+			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY));
+
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 83e3890eef20..c28ce62d2e40 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -55,6 +55,7 @@ enum uverbs_default_objects {
 	UVERBS_OBJECT_WQ,
 	UVERBS_OBJECT_FLOW_ACTION,
 	UVERBS_OBJECT_DM,
+	UVERBS_OBJECT_COUNTERS,
 };

 enum {
@@ -131,4 +132,17 @@ enum uverbs_methods_mr {
 	UVERBS_METHOD_DM_MR_REG,
 };

+enum uverbs_attrs_create_counters_cmd_attr_ids {
+	UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+};
+
+enum uverbs_attrs_destroy_counters_cmd_attr_ids {
+	UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+};
+
+enum uverbs_methods_actions_counters_ops {
+	UVERBS_METHOD_COUNTERS_CREATE,
+	UVERBS_METHOD_COUNTERS_DESTROY,
+};
+
 #endif

^ permalink raw reply related

* [PATCH rdma-next v2 03/13] IB/core: Introduce counters object and its create/destroy
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Raed Salem <raeds@mellanox.com>

A verbs application may need to get statistics and info on various
aspects of a verb object (e.g. Flow, QP, ...), in general case the
application will state which object's counters its interested in
(we refer to this action as attach), bind this new counters object
to the appropriate verb object and on later stage read their values
using the counters object.

This series introduces a general API for counters object that may
accumulate any ib object counters type, bound and read on demand.

Counters instance is allocated on an IB context and belongs to
that context.
Upon successful creation the counters can be bound to a verbs
object so that hardware counter instances can be created and read.

Downstream patches in this series will introduce the attach, bind
and the read functionality.

Counters instance can be de-allocated, upon successful
destruction the related hardware resources are released.

Prior to destroy call the user must first make sure that the counters
is not being used by any IB object, e.g. not attached to any of its
counted type otherwise an EBUSY error is invoked.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/rdma/ib_verbs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e849bd0fc618..ce3d39725966 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2212,6 +2212,13 @@ struct ib_port_pkey_list {
 	struct list_head              pkey_list;
 };

+struct ib_counters {
+	struct ib_device	*device;
+	struct ib_uobject	*uobject;
+	/* num of objects attached */
+	atomic_t	usecnt;
+};
+
 struct uverbs_attr_bundle;

 struct ib_device {
@@ -2483,6 +2490,10 @@ struct ib_device {
 	struct ib_mr *             (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
 						struct ib_dm_mr_attr *attr,
 						struct uverbs_attr_bundle *attrs);
+	struct ib_counters *	(*create_counters)(struct ib_device *device,
+						   struct uverbs_attr_bundle *attrs);
+	int	(*destroy_counters)(struct ib_counters	*counters);
+
 	/**
 	 * rdma netdev operation
 	 *

^ permalink raw reply related

* [PATCH rdma-next v2 01/13] IB/uverbs: Add an ib_uobject getter to ioctl() infrastructure
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180529130917.13592-1-leon@kernel.org>

From: Matan Barak <matanb@mellanox.com>

Previously, the user had to dig inside the attribute to get the uobject.
Add a helper function that correctly extract it (and do the required
checks) for him/her.

Tested-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_std_types_cq.c      | 23 +++++++++++-----------
 .../infiniband/core/uverbs_std_types_flow_action.c |  4 ++--
 include/rdma/uverbs_ioctl.h                        | 11 +++++++++++
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index b0dbae9dd0d7..3d293d01afea 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -65,7 +65,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	struct ib_cq_init_attr attr = {};
 	struct ib_cq                   *cq;
 	struct ib_uverbs_completion_event_file    *ev_file = NULL;
-	const struct uverbs_attr *ev_file_attr;
 	struct ib_uobject *ev_file_uobj;

 	if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
@@ -87,10 +86,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 						UVERBS_ATTR_CREATE_CQ_FLAGS)))
 		return -EFAULT;

-	ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
-	if (!IS_ERR(ev_file_attr)) {
-		ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
+	ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+	if (!IS_ERR(ev_file_uobj)) {
 		ev_file = container_of(ev_file_uobj,
 				       struct ib_uverbs_completion_event_file,
 				       uobj_file.uobj);
@@ -102,8 +99,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 		goto err_event_file;
 	}

-	obj = container_of(uverbs_attr_get(attrs,
-					   UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+	obj = container_of(uverbs_attr_get_uobject(attrs,
+						   UVERBS_ATTR_CREATE_CQ_HANDLE),
 			   typeof(*obj), uobject);
 	obj->uverbs_file	   = ucontext->ufile;
 	obj->comp_events_reported  = 0;
@@ -170,13 +167,17 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
 						    struct ib_uverbs_file *file,
 						    struct uverbs_attr_bundle *attrs)
 {
-	struct ib_uverbs_destroy_cq_resp resp;
 	struct ib_uobject *uobj =
-		uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
-	struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
-						 uobject);
+		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
+	struct ib_uverbs_destroy_cq_resp resp;
+	struct ib_ucq_object *obj;
 	int ret;

+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
+
+	obj = container_of(uobj, struct ib_ucq_object, uobject);
+
 	if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
 		return -EOPNOTSUPP;

diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index b4f016dfa23d..a7be51cf2e42 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -320,7 +320,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
 		return ret;

 	/* No need to check as this attribute is marked as MANDATORY */
-	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
 	action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
 	if (IS_ERR(action))
 		return PTR_ERR(action);
@@ -350,7 +350,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
 	if (ret)
 		return ret;

-	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
 	action = uobj->object;

 	if (action->type != IB_FLOW_ACTION_ESP)
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 4a4201d997a7..7ac6271a5ee0 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -420,6 +420,17 @@ static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_b
 	return uobj->object;
 }

+static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle,
+							 u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return ERR_CAST(attr);
+
+	return attr->obj_attr.uobject;
+}
+
 static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
 				 size_t idx, const void *from, size_t size)
 {

^ permalink raw reply related

* [PATCH rdma-next v2 00/13] Verbs flow counters support
From: Leon Romanovsky @ 2018-05-29 13:09 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev

From: Leon Romanovsky <leonro@mellanox.com>

Changelog
v1->v2:
 * Removed conversion from struct mlx5_fc* to void*
 * Fixed one place with double space in it
 * Balanced release of hardware handler in case of counters allocation failure
 * Added Tested-by
 * Minimize time spent holding mutex lock
 * Fixed deadlock caused by nested lock in error path
 * Protect from handler pointer derefence in the error paths

Not changed: mlx5-abi.h

v0->v1:
 * Decouple from DevX submission
 * Use uverbs_attr_get_obj at counters read method
 * Added define for max read buffer size (MAX_COUNTERS_BUFF_SIZE)
 * Removed the struct mlx5_ib_flow_counter basic_flow_cnts and
   the related structs used, used define instead
 * Took Matan's patch from DevX
 * uverbs_free_counters removed void* casting
 * Added check to bound ncounters value (added define
 * Changed user supplied data buffer structure to be array of
   struct <desc,index> pair (applied this change to user space also)

Not changed:
 * UAPI files
 * Addition of uhw to flow

Thanks

----------------------------------------------------------------------
>From Raed:

This series comes to allow user space applications to monitor real time
traffic activity and events of the verbs objects it manages, e.g.:
ibv_qp, ibv_wq, ibv_flow.

This API enables generic counters creation and define mapping
to association with a verbs object, current mlx5 driver using
this API for flow counters.

With this API, an application can monitor the entire life cycle of
object activity, defined here as a static counters attachment.
This API also allows dynamic counters monitoring of measurement points
for a partial period in the verbs object life cycle.

In addition it presents the implementation of the generic counters interface.

This will be achieved by extending flow creation by adding a new flow count
specification type which allows the user to associate a previously created
flow counters using the generic verbs counters interface to the created flow,
once associated the user could read statistics by using the read function of
the generic counters interface.

The API includes:
1. create and destroyed API of a new counters objects
2. read the counters values from HW

Note:
Attaching API to allow application to define the measurement points per objects
is a user space only API and this data is passed to kernel when the counted
object (e.g. flow) is created with the counters object.

Thanks


Matan Barak (2):
  IB/uverbs: Add an ib_uobject getter to ioctl() infrastructure
  IB/core: Support passing uhw for create_flow

Raed Salem (11):
  net/mlx5: Export flow counter related API
  IB/core: Introduce counters object and its create/destroy
  IB/uverbs: Add create/destroy counters support
  IB/core: Introduce counters read verb
  IB/uverbs: Add read counters support
  IB/core: Add support for flow counters
  IB/uverbs: Add support for flow counters
  IB/mlx5: Add counters create and destroy support
  IB/mlx5: Add flow counters binding support
  IB/mlx5: Add flow counters read support
  IB/mlx5: Add counters read support

 drivers/infiniband/core/Makefile                   |   2 +-
 drivers/infiniband/core/uverbs.h                   |   2 +
 drivers/infiniband/core/uverbs_cmd.c               |  88 +++++-
 drivers/infiniband/core/uverbs_std_types.c         |   3 +-
 .../infiniband/core/uverbs_std_types_counters.c    | 157 +++++++++++
 drivers/infiniband/core/uverbs_std_types_cq.c      |  23 +-
 .../infiniband/core/uverbs_std_types_flow_action.c |   4 +-
 drivers/infiniband/core/verbs.c                    |   2 +-
 drivers/infiniband/hw/mlx4/main.c                  |   6 +-
 drivers/infiniband/hw/mlx5/main.c                  | 305 ++++++++++++++++++++-
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |  36 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  23 --
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  |   3 +
 include/linux/mlx5/fs.h                            |  23 ++
 include/rdma/ib_verbs.h                            |  43 ++-
 include/rdma/uverbs_ioctl.h                        |  11 +
 include/uapi/rdma/ib_user_ioctl_cmds.h             |  21 ++
 include/uapi/rdma/ib_user_verbs.h                  |  13 +
 include/uapi/rdma/mlx5-abi.h                       |  14 +
 19 files changed, 713 insertions(+), 66 deletions(-)
 create mode 100644 drivers/infiniband/core/uverbs_std_types_counters.c

^ permalink raw reply

* Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs
From: Michael Tuexen @ 2018-05-29 13:06 UTC (permalink / raw)
  To: Neil Horman
  Cc: Marcelo Ricardo Leitner, Dmitry Vyukov, Xin Long, network dev,
	linux-sctp, David Miller, David Ahern, Eric Dumazet, syzkaller
In-Reply-To: <20180529114111.GA24144@hmswarspite.think-freely.org>

> On 29. May 2018, at 13:41, Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> On Mon, May 28, 2018 at 04:43:15PM -0300, Marcelo Ricardo Leitner wrote:
>> On Sat, May 26, 2018 at 09:01:00PM -0400, Neil Horman wrote:
>>> On Sat, May 26, 2018 at 05:50:39PM +0200, Dmitry Vyukov wrote:
>>>> On Sat, May 26, 2018 at 5:42 PM, Michael Tuexen
>>>> <michael.tuexen@lurchi.franken.de> wrote:
>>>>>> On 25. May 2018, at 21:13, Neil Horman <nhorman@tuxdriver.com> wrote:
>>>>>> 
>>>>>> On Sat, May 26, 2018 at 01:41:02AM +0800, Xin Long wrote:
>>>>>>> syzbot reported a rcu_sched self-detected stall on CPU which is caused
>>>>>>> by too small value set on rto_min with SCTP_RTOINFO sockopt. With this
>>>>>>> value, hb_timer will get stuck there, as in its timer handler it starts
>>>>>>> this timer again with this value, then goes to the timer handler again.
>>>>>>> 
>>>>>>> This problem is there since very beginning, and thanks to Eric for the
>>>>>>> reproducer shared from a syzbot mail.
>>>>>>> 
>>>>>>> This patch fixes it by not allowing to set rto_min with a value below
>>>>>>> 200 msecs, which is based on TCP's, by either setsockopt or sysctl.
>>>>>>> 
>>>>>>> Reported-by: syzbot+3dcd59a1f907245f891f@syzkaller.appspotmail.com
>>>>>>> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
>>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>>>>>> ---
>>>>>>> include/net/sctp/constants.h |  1 +
>>>>>>> net/sctp/socket.c            | 10 +++++++---
>>>>>>> net/sctp/sysctl.c            |  3 ++-
>>>>>>> 3 files changed, 10 insertions(+), 4 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
>>>>>>> index 20ff237..2ee7a7b 100644
>>>>>>> --- a/include/net/sctp/constants.h
>>>>>>> +++ b/include/net/sctp/constants.h
>>>>>>> @@ -277,6 +277,7 @@ enum { SCTP_MAX_GABS = 16 };
>>>>>>> #define SCTP_RTO_INITIAL     (3 * 1000)
>>>>>>> #define SCTP_RTO_MIN         (1 * 1000)
>>>>>>> #define SCTP_RTO_MAX         (60 * 1000)
>>>>>>> +#define SCTP_RTO_HARD_MIN   200
>>>>>>> 
>>>>>>> #define SCTP_RTO_ALPHA          3   /* 1/8 when converted to right shifts. */
>>>>>>> #define SCTP_RTO_BETA           2   /* 1/4 when converted to right shifts. */
>>>>>>> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
>>>>>>> index ae7e7c6..6ef12c7 100644
>>>>>>> --- a/net/sctp/socket.c
>>>>>>> +++ b/net/sctp/socket.c
>>>>>>> @@ -3029,7 +3029,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
>>>>>>> * be changed.
>>>>>>> *
>>>>>>> */
>>>>>>> -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen)
>>>>>>> +static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval,
>>>>>>> +                               unsigned int optlen)
>>>>>>> {
>>>>>>>     struct sctp_rtoinfo rtoinfo;
>>>>>>>     struct sctp_association *asoc;
>>>>>>> @@ -3056,10 +3057,13 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne
>>>>>>>     else
>>>>>>>             rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max;
>>>>>>> 
>>>>>>> -    if (rto_min)
>>>>>>> +    if (rto_min) {
>>>>>>> +            if (rto_min < SCTP_RTO_HARD_MIN)
>>>>>>> +                    return -EINVAL;
>>>>>>>             rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min;
>>>>>>> -    else
>>>>>>> +    } else {
>>>>>>>             rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min;
>>>>>>> +    }
>>>>>>> 
>>>>>>>     if (rto_min > rto_max)
>>>>>>>             return -EINVAL;
>>>>>>> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
>>>>>>> index 33ca5b7..7ec854a 100644
>>>>>>> --- a/net/sctp/sysctl.c
>>>>>>> +++ b/net/sctp/sysctl.c
>>>>>>> @@ -52,6 +52,7 @@ static int rto_alpha_min = 0;
>>>>>>> static int rto_beta_min = 0;
>>>>>>> static int rto_alpha_max = 1000;
>>>>>>> static int rto_beta_max = 1000;
>>>>>>> +static int rto_hard_min = SCTP_RTO_HARD_MIN;
>>>>>>> 
>>>>>>> static unsigned long max_autoclose_min = 0;
>>>>>>> static unsigned long max_autoclose_max =
>>>>>>> @@ -116,7 +117,7 @@ static struct ctl_table sctp_net_table[] = {
>>>>>>>             .maxlen         = sizeof(unsigned int),
>>>>>>>             .mode           = 0644,
>>>>>>>             .proc_handler   = proc_sctp_do_rto_min,
>>>>>>> -            .extra1         = &one,
>>>>>>> +            .extra1         = &rto_hard_min,
>>>>>>>             .extra2         = &init_net.sctp.rto_max
>>>>>>>     },
>>>>>>>     {
>>>>>>> --
>>>>>>> 2.1.0
>>>>>>> 
>>>>>>> --
>>>>>>> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
>>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>>>> 
>>>>>> Patch looks fine, you probably want to note this hard minimum in man(7) sctp as
>>>>>> well
>>>>>> 
>>>>> I'm aware of some signalling networks which use RTO.min of smaller values than 200ms.
>>>>> So could this be reduced?
>>>> 
>>>> Hi Michael,
>>>> 
>>>> What value do they use?
>>>> 
>>>> Xin, Neil, is there more principled way of ensuring that a timer won't
>>>> cause a hard CPU stall? There are slow machines and there are slow
>>>> kernels (in particular syzbot kernel has tons of debug configs
>>>> enabled). 200ms _should_ not cause problems because we did not see
>>>> them with tcp. But it's hard to say what's the low limit as we are
>>>> trying to put a hard upper bound on execution time of a complex
>>>> section of code. Is there something like cond_resched for timers?
>>> Unfortunately, Theres not really a way to do conditional rescheduling of timers,
>>> additionally, we have a problem because the timer is reset as a side effect of
>>> the SCTP state machine, and so the execution time between timer updates has a
>>> signifcant amount of jitter (meaning its a pretty hard value to calibrate,
>>> unless you just select a 'safe' large value for the floor).
>>> 
>>> What we might could do (though this might impact the protocol function is change
>>> the timer update side effects to simply set a flag, and consistently update the
>>> timers on exit from sctp_do_sm, so they don't re-arm until all state machine
>>> processing is complete.  Anyone have any thoughts on that?
>> 
>> I was reviewing all this again and I'm thinking that we are missing
>> the real point. With the parameters that reproducer [1] has, setting
>> those very low RTO parameters, it causes the timer to actually
>> busyloop on the heartbeats, as Xin had explained.
>> 
>> But thing is, it busy loops not just because RTO is too low, but
>> because hbinterval was not accounted.
>> 
>> /* What is the next timeout value for this transport? */
>> unsigned long sctp_transport_timeout(struct sctp_transport *trans)
>> {
>>        /* RTO + timer slack +/- 50% of RTO */
>>        unsigned long timeout = trans->rto >> 1;  <-- [a]
>> 
>>        if (trans->state != SCTP_UNCONFIRMED &&
>>            trans->state != SCTP_PF)             <--- [2]
>>                timeout += trans->hbinterval;
>> 
>>        return timeout;
>> }
>> 
>> The if() in [2] is to speed up path verification before using them, as
>> per the commit changelog. Secondary paths added on processing the
>> cookie are created with status SCTP_UNCONFIRMED, and HB timers are
>> started in the sequence:
>> sctp_sf_do_5_1D_ce
>>   -> sctp_process_init
>>     |> sctp_process_param
>>     | -> sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)
>>     '> sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
>> 
>> which starts the timer using only the small RTO for secondary paths:
>> static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds,
>>                                     struct sctp_association *asoc)
>> {
>>        struct sctp_transport *t;
>> 
>>        /* Start a heartbeat timer for each transport on the association.
>>         * hold a reference on the transport to make sure none of
>>         * the needed data structures go away.
>>         */
>>        list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
>>                sctp_transport_reset_hb_timer(t);
>> }
>> 
>> But if the system is too busy generating HBs, it likely won't process
>> incoming HB ACKs, which would stop the loop as it would mark the
>> transport as Active.
>> 
>> I'm now thinking a better fix would be to have a specific way to
>> kickstart these initial heartbeets, and then always use hbinterval on
>> subsequent ones.
>> 
> I like the idea, but I don't think we can just use the hbinterval to set the
> timeout.  That said, it seems like we should always be using the HB interval,
> not just on unconfirmed or partially failed transports.  From the RFC:
> 
> On an idle destination address that is allowed to heartbeat, it is
>   recommended that a HEARTBEAT chunk is sent once per RTO of that
>   destination address plus the protocol parameter 'HB.interval', with
>   jittering of +/- 50% of the RTO value, and exponential backoff of the
>   RTO if the previous HEARTBEAT is unanswered
Aren't we talking about the path confirmation procedure?
This is described in https://tools.ietf.org/html/rfc4960#section-5.4
where it is stated:

   In each RTO, a probe may be sent on an active UNCONFIRMED path in an
   attempt to move it to the CONFIRMED state.  If during this probing
   the path becomes inactive, this rate is lowered to the normal
   HEARTBEAT rate.  At the expiration of the RTO timer, the error
   counter of any path that was probed but not CONFIRMED is incremented
   by one and subjected to path failure detection, as defined in Section 8.2.
   When probing UNCONFIRMED addresses, however, the association
   overall error count is NOT incremented.

So during path confirmation there is no requirement to add HB.interval.

Best regards
Michael
> 
> It seems like we should be adding it to the timer expiration universally.  By my
> read, we've never done this quite right.  And yes, I agree, if we account this
> properly, we will avoid this issue.
> 
> Its also probably important to note here, that, like RTO.min currently, there is
> no hard floor to the heartbeat interval, and the RFC is silent on what it should
> be.  So it would be possible to still find ourselves in this situation if we set
> the interval to 0 from userspace.  Is it worth considering a floor on the
> minimum hb interval of the rto is to have no floor?
> 
> Neil
> 
> 
>> This would not only fix the issue, but also improve the time we need
>> to identify the transports as Active upon association start, which is
>> currently RTO/2 (equals to 500ms by default).
>> 
>> While working on this, I got myself wondering how HZ can affect the
>> stack with such small RTO. If we have HZ=250, for example, we probably
>> should be careful when doing calcs such as in mark [a] to not let it
>> tend to 0. This should not be related to the reported issue as
>> syzkaller was using HZ=1000.
>> 
>> (I didn't do any tests, this is only based on code review so far)
>> 
>> 1. https://syzkaller.appspot.com/x/repro.syz?x=1079cf8f800000
>> 2. ad8fec1720e0 ("[SCTP]: Verify all the paths to a peer via heartbeat before using them.")
>> b. https://syzkaller.appspot.com/x/.config?x=f3b4e30da84ec1ed
>> 

^ permalink raw reply

* [PATCH net-next 5/5] net: aquantia: bump driver version
From: Igor Russkikh @ 2018-05-29 12:57 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, David Arcari, Pavel Belous, Igor Russkikh
In-Reply-To: <cover.1527596210.git.igor.russkikh@aquantia.com>

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
 drivers/net/ethernet/aquantia/atlantic/ver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h
index a445de6..94efc64 100644
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -12,8 +12,8 @@
 
 #define NIC_MAJOR_DRIVER_VERSION           2
 #define NIC_MINOR_DRIVER_VERSION           0
-#define NIC_BUILD_DRIVER_VERSION           2
-#define NIC_REVISION_DRIVER_VERSION        1
+#define NIC_BUILD_DRIVER_VERSION           3
+#define NIC_REVISION_DRIVER_VERSION        0
 
 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 4/5] net: aquantia: Add renegotiate ethtool operation support
From: Igor Russkikh @ 2018-05-29 12:57 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Pavel Belous, Igor Russkikh, Anton Mikaev
In-Reply-To: <cover.1527596210.git.igor.russkikh@aquantia.com>

From: Anton Mikaev <amikaev@aquantia.com>

Adds ethtool -r|--negotiate operation support. It triggers special
control bit on FW interface causing FW to restart link negotiation.

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: Anton Mikaev <amikaev@aquantia.com>
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    | 14 +++++++++
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  2 ++
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h        | 35 ++++++++++++++++++++++
 .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c   | 12 ++++++++
 4 files changed, 63 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index c679203..ad6c504 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -285,6 +285,19 @@ static int aq_ethtool_set_coalesce(struct net_device *ndev,
 	return aq_nic_update_interrupt_moderation_settings(aq_nic);
 }
 
+static int aq_ethtool_nway_reset(struct net_device *ndev)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	if (unlikely(!aq_nic->aq_fw_ops->renegotiate))
+		return -EOPNOTSUPP;
+
+	if (netif_running(ndev))
+		return aq_nic->aq_fw_ops->renegotiate(aq_nic->aq_hw);
+
+	return 0;
+}
+
 static void aq_ethtool_get_pauseparam(struct net_device *ndev,
 				      struct ethtool_pauseparam *pause)
 {
@@ -391,6 +404,7 @@ const struct ethtool_ops aq_ethtool_ops = {
 	.get_drvinfo         = aq_ethtool_get_drvinfo,
 	.get_strings         = aq_ethtool_get_strings,
 	.get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
+	.nway_reset          = aq_ethtool_nway_reset,
 	.get_ringparam       = aq_get_ringparam,
 	.set_ringparam       = aq_set_ringparam,
 	.get_pauseparam      = aq_ethtool_get_pauseparam,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 3aa36d5..1a51152 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -212,6 +212,8 @@ struct aq_fw_ops {
 
 	int (*reset)(struct aq_hw_s *self);
 
+	int (*renegotiate)(struct aq_hw_s *self);
+
 	int (*get_mac_permanent)(struct aq_hw_s *self, u8 *mac);
 
 	int (*set_link_speed)(struct aq_hw_s *self, u32 speed);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index cd8f18f..b875590 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -239,6 +239,41 @@ enum hw_atl_fw2x_caps_hi {
 	CAPS_HI_TRANSACTION_ID,
 };
 
+enum hw_atl_fw2x_ctrl {
+	CTRL_RESERVED1 = 0x00,
+	CTRL_RESERVED2,
+	CTRL_RESERVED3,
+	CTRL_PAUSE,
+	CTRL_ASYMMETRIC_PAUSE,
+	CTRL_RESERVED4,
+	CTRL_RESERVED5,
+	CTRL_RESERVED6,
+	CTRL_1GBASET_FD_EEE,
+	CTRL_2P5GBASET_FD_EEE,
+	CTRL_5GBASET_FD_EEE,
+	CTRL_10GBASET_FD_EEE,
+	CTRL_THERMAL_SHUTDOWN,
+	CTRL_PHY_LOGS,
+	CTRL_EEE_AUTO_DISABLE,
+	CTRL_PFC,
+	CTRL_WAKE_ON_LINK,
+	CTRL_CABLE_DIAG,
+	CTRL_TEMPERATURE,
+	CTRL_DOWNSHIFT,
+	CTRL_PTP_AVB,
+	CTRL_RESERVED7,
+	CTRL_LINK_DROP,
+	CTRL_SLEEP_PROXY,
+	CTRL_WOL,
+	CTRL_MAC_STOP,
+	CTRL_EXT_LOOPBACK,
+	CTRL_INT_LOOPBACK,
+	CTRL_RESERVED8,
+	CTRL_WOL_TIMER,
+	CTRL_STATISTICS,
+	CTRL_FORCE_RECONNECT,
+};
+
 struct aq_hw_s;
 struct aq_fw_ops;
 struct aq_hw_caps_s;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index d2d030a..1935fd6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -215,6 +215,17 @@ static int aq_fw2x_update_stats(struct aq_hw_s *self)
 	return hw_atl_utils_update_stats(self);
 }
 
+static int aq_fw2x_renegotiate(struct aq_hw_s *self)
+{
+	u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+	mpi_opts |= BIT(CTRL_FORCE_RECONNECT);
+
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+	return 0;
+}
+
 static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
 {
 	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
@@ -230,6 +241,7 @@ const struct aq_fw_ops aq_fw_2x_ops = {
 	.init = aq_fw2x_init,
 	.deinit = aq_fw2x_deinit,
 	.reset = NULL,
+	.renegotiate = aq_fw2x_renegotiate,
 	.get_mac_permanent = aq_fw2x_get_mac_permanent,
 	.set_link_speed = aq_fw2x_set_link_speed,
 	.set_state = aq_fw2x_set_state,
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 3/5] net: aquantia: Implement rx/tx flow control ethtools callback
From: Igor Russkikh @ 2018-05-29 12:57 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, David Arcari, Pavel Belous, Igor Russkikh
In-Reply-To: <cover.1527596210.git.igor.russkikh@aquantia.com>

Runtime change of pause frame configuration (rx/tx flow control)
via ethtool.

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    | 42 ++++++++++++++++++++++
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |  6 +++-
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        |  1 +
 .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c   | 26 ++++++++++++++
 4 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index bc43d29..c679203 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -285,6 +285,46 @@ static int aq_ethtool_set_coalesce(struct net_device *ndev,
 	return aq_nic_update_interrupt_moderation_settings(aq_nic);
 }
 
+static void aq_ethtool_get_pauseparam(struct net_device *ndev,
+				      struct ethtool_pauseparam *pause)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+	pause->autoneg = 0;
+
+	if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+		pause->rx_pause = 1;
+	if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
+		pause->tx_pause = 1;
+}
+
+static int aq_ethtool_set_pauseparam(struct net_device *ndev,
+				     struct ethtool_pauseparam *pause)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	int err = 0;
+
+	if (!aq_nic->aq_fw_ops->set_flow_control)
+		return -EOPNOTSUPP;
+
+	if (pause->autoneg == AUTONEG_ENABLE)
+		return -EOPNOTSUPP;
+
+	if (pause->rx_pause)
+		aq_nic->aq_hw->aq_nic_cfg->flow_control |= AQ_NIC_FC_RX;
+	else
+		aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_RX;
+
+	if (pause->tx_pause)
+		aq_nic->aq_hw->aq_nic_cfg->flow_control |= AQ_NIC_FC_TX;
+	else
+		aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_TX;
+
+	err = aq_nic->aq_fw_ops->set_flow_control(aq_nic->aq_hw);
+
+	return err;
+}
+
 static void aq_get_ringparam(struct net_device *ndev,
 			     struct ethtool_ringparam *ring)
 {
@@ -353,6 +393,8 @@ const struct ethtool_ops aq_ethtool_ops = {
 	.get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
 	.get_ringparam       = aq_get_ringparam,
 	.set_ringparam       = aq_set_ringparam,
+	.get_pauseparam      = aq_ethtool_get_pauseparam,
+	.set_pauseparam      = aq_ethtool_set_pauseparam,
 	.get_rxfh_key_size   = aq_ethtool_get_rss_key_size,
 	.get_rxfh            = aq_ethtool_get_rss,
 	.get_rxnfc           = aq_ethtool_get_rxnfc,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index bbafa4e..14fa76a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -766,10 +766,14 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     100baseT_Full);
 
-	if (self->aq_nic_cfg.flow_control)
+	if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX)
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Pause);
 
+	if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+
 	if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_FIBRE)
 		ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
 	else
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 9d0a96d..e1feba5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -834,4 +834,5 @@ const struct aq_fw_ops aq_fw_1x_ops = {
 	.set_state = hw_atl_utils_mpi_set_state,
 	.update_link_status = hw_atl_utils_mpi_get_link_status,
 	.update_stats = hw_atl_utils_update_stats,
+	.set_flow_control = NULL,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index a4ac592..d2d030a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -87,6 +87,19 @@ static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed)
 	return 0;
 }
 
+static void aq_fw2x_set_mpi_flow_control(struct aq_hw_s *self, u32 *mpi_state)
+{
+	if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+		*mpi_state |= BIT(CAPS_HI_PAUSE);
+	else
+		*mpi_state &= ~BIT(CAPS_HI_PAUSE);
+
+	if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
+		*mpi_state |= BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+	else
+		*mpi_state &= ~BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+}
+
 static int aq_fw2x_set_state(struct aq_hw_s *self,
 			     enum hal_atl_utils_fw_state_e state)
 {
@@ -95,6 +108,7 @@ static int aq_fw2x_set_state(struct aq_hw_s *self,
 	switch (state) {
 	case MPI_INIT:
 		mpi_state &= ~BIT(CAPS_HI_LINK_DROP);
+		aq_fw2x_set_mpi_flow_control(self, &mpi_state);
 		break;
 	case MPI_DEINIT:
 		mpi_state |= BIT(CAPS_HI_LINK_DROP);
@@ -201,6 +215,17 @@ static int aq_fw2x_update_stats(struct aq_hw_s *self)
 	return hw_atl_utils_update_stats(self);
 }
 
+static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
+{
+	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+	aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
+
+	return 0;
+}
+
 const struct aq_fw_ops aq_fw_2x_ops = {
 	.init = aq_fw2x_init,
 	.deinit = aq_fw2x_deinit,
@@ -210,4 +235,5 @@ const struct aq_fw_ops aq_fw_2x_ops = {
 	.set_state = aq_fw2x_set_state,
 	.update_link_status = aq_fw2x_update_link_status,
 	.update_stats = aq_fw2x_update_stats,
+	.set_flow_control   = aq_fw2x_set_flow_control,
 };
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 2/5] net: aquantia: Improve adapter init/deinit logic
From: Igor Russkikh @ 2018-05-29 12:56 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, David Arcari, Pavel Belous, Igor Russkikh
In-Reply-To: <cover.1527596210.git.igor.russkikh@aquantia.com>

We now pass link drop status to FW on init/deinit. This is required
to inform FW that driver took/released a control on link.
FW then will manage its own state and device power profile based
on this information. To improve management we remove mpi_set
function which ambiguously took both state and speed parameters.

Deinit callback is now a part of FW ops, as it actually manages the FW.

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  9 ++--
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |  2 +-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  |  1 -
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  1 -
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        | 53 ++++++++++++----------
 .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c   | 31 ++++++++++++-
 6 files changed, 66 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 904cdfd..3aa36d5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -202,25 +202,28 @@ struct aq_hw_ops {
 
 	int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
 
-	int (*hw_deinit)(struct aq_hw_s *self);
-
 	int (*hw_set_power)(struct aq_hw_s *self, unsigned int power_state);
 };
 
 struct aq_fw_ops {
 	int (*init)(struct aq_hw_s *self);
 
+	int (*deinit)(struct aq_hw_s *self);
+
 	int (*reset)(struct aq_hw_s *self);
 
 	int (*get_mac_permanent)(struct aq_hw_s *self, u8 *mac);
 
 	int (*set_link_speed)(struct aq_hw_s *self, u32 speed);
 
-	int (*set_state)(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state);
+	int (*set_state)(struct aq_hw_s *self,
+			 enum hal_atl_utils_fw_state_e state);
 
 	int (*update_link_status)(struct aq_hw_s *self);
 
 	int (*update_stats)(struct aq_hw_s *self);
+
+	int (*set_flow_control)(struct aq_hw_s *self);
 };
 
 #endif /* AQ_HW_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 05d4e28..bbafa4e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -884,7 +884,7 @@ void aq_nic_deinit(struct aq_nic_s *self)
 		aq_vec_deinit(aq_vec);
 
 	if (self->power_state == AQ_HW_POWER_STATE_D0) {
-		(void)self->aq_hw_ops->hw_deinit(self->aq_hw);
+		(void)self->aq_fw_ops->deinit(self->aq_hw);
 	} else {
 		(void)self->aq_hw_ops->hw_set_power(self->aq_hw,
 						   self->power_state);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 7fd6a7e..ed7fe6f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -877,7 +877,6 @@ static int hw_atl_a0_hw_ring_rx_stop(struct aq_hw_s *self,
 const struct aq_hw_ops hw_atl_ops_a0 = {
 	.hw_set_mac_address   = hw_atl_a0_hw_mac_addr_set,
 	.hw_init              = hw_atl_a0_hw_init,
-	.hw_deinit            = hw_atl_utils_hw_deinit,
 	.hw_set_power         = hw_atl_utils_hw_set_power,
 	.hw_reset             = hw_atl_a0_hw_reset,
 	.hw_start             = hw_atl_a0_hw_start,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 4ea15b9..9dd4f49 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -935,7 +935,6 @@ static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self,
 const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_set_mac_address   = hw_atl_b0_hw_mac_addr_set,
 	.hw_init              = hw_atl_b0_hw_init,
-	.hw_deinit            = hw_atl_utils_hw_deinit,
 	.hw_set_power         = hw_atl_utils_hw_set_power,
 	.hw_reset             = hw_atl_b0_hw_reset,
 	.hw_start             = hw_atl_b0_hw_start,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index e652d86..9d0a96d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -30,10 +30,11 @@
 #define HW_ATL_MPI_CONTROL_ADR  0x0368U
 #define HW_ATL_MPI_STATE_ADR    0x036CU
 
-#define HW_ATL_MPI_STATE_MSK    0x00FFU
-#define HW_ATL_MPI_STATE_SHIFT  0U
-#define HW_ATL_MPI_SPEED_MSK    0xFFFF0000U
-#define HW_ATL_MPI_SPEED_SHIFT  16U
+#define HW_ATL_MPI_STATE_MSK      0x00FFU
+#define HW_ATL_MPI_STATE_SHIFT    0U
+#define HW_ATL_MPI_SPEED_MSK      0x00FF0000U
+#define HW_ATL_MPI_SPEED_SHIFT    16U
+#define HW_ATL_MPI_DIRTY_WAKE_MSK 0x02000000U
 
 #define HW_ATL_MPI_DAISY_CHAIN_STATUS	0x704
 #define HW_ATL_MPI_BOOT_EXIT_CODE	0x388
@@ -521,23 +522,24 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
 err_exit:;
 }
 
-static int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
+int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
 {
 	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 
-	val = (val & HW_ATL_MPI_STATE_MSK) | (speed << HW_ATL_MPI_SPEED_SHIFT);
+	val = val & ~HW_ATL_MPI_SPEED_MSK;
+	val |= speed << HW_ATL_MPI_SPEED_SHIFT;
 	aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
 
 	return 0;
 }
 
-void hw_atl_utils_mpi_set(struct aq_hw_s *self,
-			  enum hal_atl_utils_fw_state_e state,
-			  u32 speed)
+int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
+			       enum hal_atl_utils_fw_state_e state)
 {
 	int err = 0;
 	u32 transaction_id = 0;
 	struct hw_aq_atl_utils_mbox_header mbox;
+	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 
 	if (state == MPI_RESET) {
 		hw_atl_utils_mpi_read_mbox(self, &mbox);
@@ -551,21 +553,21 @@ void hw_atl_utils_mpi_set(struct aq_hw_s *self,
 		if (err < 0)
 			goto err_exit;
 	}
+	/* On interface DEINIT we disable DW (raise bit)
+	 * Otherwise enable DW (clear bit)
+	 */
+	if (state == MPI_DEINIT || state == MPI_POWER)
+		val |= HW_ATL_MPI_DIRTY_WAKE_MSK;
+	else
+		val &= ~HW_ATL_MPI_DIRTY_WAKE_MSK;
 
-	aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR,
-			(speed << HW_ATL_MPI_SPEED_SHIFT) | state);
+	/* Set new state bits */
+	val = val & ~HW_ATL_MPI_STATE_MSK;
+	val |= state & HW_ATL_MPI_STATE_MSK;
 
-err_exit:;
-}
-
-static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
-				      enum hal_atl_utils_fw_state_e state)
-{
-	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
-
-	val = state | (val & HW_ATL_MPI_SPEED_MSK);
 	aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
-	return 0;
+err_exit:
+	return err;
 }
 
 int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
@@ -721,16 +723,18 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
 	*p = chip_features;
 }
 
-int hw_atl_utils_hw_deinit(struct aq_hw_s *self)
+static int hw_atl_fw1x_deinit(struct aq_hw_s *self)
 {
-	hw_atl_utils_mpi_set(self, MPI_DEINIT, 0x0U);
+	hw_atl_utils_mpi_set_speed(self, 0);
+	hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
 	return 0;
 }
 
 int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
 			      unsigned int power_state)
 {
-	hw_atl_utils_mpi_set(self, MPI_POWER, 0x0U);
+	hw_atl_utils_mpi_set_speed(self, 0);
+	hw_atl_utils_mpi_set_state(self, MPI_POWER);
 	return 0;
 }
 
@@ -823,6 +827,7 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version)
 
 const struct aq_fw_ops aq_fw_1x_ops = {
 	.init = hw_atl_utils_mpi_create,
+	.deinit = hw_atl_fw1x_deinit,
 	.reset = NULL,
 	.get_mac_permanent = hw_atl_utils_get_mac_permanent,
 	.set_link_speed = hw_atl_utils_mpi_set_speed,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 8cfce95..a4ac592 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -28,6 +28,10 @@
 #define HW_ATL_FW2X_MPI_STATE_ADDR	0x370
 #define HW_ATL_FW2X_MPI_STATE2_ADDR	0x374
 
+static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed);
+static int aq_fw2x_set_state(struct aq_hw_s *self,
+			     enum hal_atl_utils_fw_state_e state);
+
 static int aq_fw2x_init(struct aq_hw_s *self)
 {
 	int err = 0;
@@ -39,6 +43,16 @@ static int aq_fw2x_init(struct aq_hw_s *self)
 	return err;
 }
 
+static int aq_fw2x_deinit(struct aq_hw_s *self)
+{
+	int err = aq_fw2x_set_link_speed(self, 0);
+
+	if (!err)
+		err = aq_fw2x_set_state(self, MPI_DEINIT);
+
+	return err;
+}
+
 static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed)
 {
 	enum hw_atl_fw2x_rate rate = 0;
@@ -76,7 +90,21 @@ static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed)
 static int aq_fw2x_set_state(struct aq_hw_s *self,
 			     enum hal_atl_utils_fw_state_e state)
 {
-	/* No explicit state in 2x fw */
+	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+	switch (state) {
+	case MPI_INIT:
+		mpi_state &= ~BIT(CAPS_HI_LINK_DROP);
+		break;
+	case MPI_DEINIT:
+		mpi_state |= BIT(CAPS_HI_LINK_DROP);
+		break;
+	case MPI_RESET:
+	case MPI_POWER:
+		/* No actions */
+		break;
+	}
+	aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
 	return 0;
 }
 
@@ -175,6 +203,7 @@ static int aq_fw2x_update_stats(struct aq_hw_s *self)
 
 const struct aq_fw_ops aq_fw_2x_ops = {
 	.init = aq_fw2x_init,
+	.deinit = aq_fw2x_deinit,
 	.reset = NULL,
 	.get_mac_permanent = aq_fw2x_get_mac_permanent,
 	.set_link_speed = aq_fw2x_set_link_speed,
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 1/5] net: aquantia: Ethtool based ring size configuration
From: Igor Russkikh @ 2018-05-29 12:56 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Pavel Belous, Igor Russkikh, Anton Mikaev
In-Reply-To: <cover.1527596210.git.igor.russkikh@aquantia.com>

From: Anton Mikaev <amikaev@aquantia.com>

Implemented ring size setup, min/max validation and reconfiguration in
runtime. NIC level lock is used to prevent collisions on parallel
reconfiguration and interference with periodic service timer job.

Signed-off-by: Anton Mikaev <amikaev@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    | 62 ++++++++++++++++++++++
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  9 +++-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |  9 +++-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |  2 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  | 46 ++++++++--------
 .../aquantia/atlantic/hw_atl/hw_atl_a0_internal.h  |  8 +++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  | 50 ++++++++---------
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h  |  8 +++
 8 files changed, 144 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index f2d8063..bc43d29 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -11,6 +11,7 @@
 
 #include "aq_ethtool.h"
 #include "aq_nic.h"
+#include "aq_vec.h"
 
 static void aq_ethtool_get_regs(struct net_device *ndev,
 				struct ethtool_regs *regs, void *p)
@@ -284,6 +285,65 @@ static int aq_ethtool_set_coalesce(struct net_device *ndev,
 	return aq_nic_update_interrupt_moderation_settings(aq_nic);
 }
 
+static void aq_get_ringparam(struct net_device *ndev,
+			     struct ethtool_ringparam *ring)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(aq_nic);
+
+	ring->rx_pending = aq_nic_cfg->rxds;
+	ring->tx_pending = aq_nic_cfg->txds;
+
+	ring->rx_max_pending = aq_nic_cfg->aq_hw_caps->rxds_max;
+	ring->tx_max_pending = aq_nic_cfg->aq_hw_caps->txds_max;
+}
+
+static int aq_set_ringparam(struct net_device *ndev,
+			    struct ethtool_ringparam *ring)
+{
+	int err = 0;
+	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(aq_nic);
+	const struct aq_hw_caps_s *hw_caps = aq_nic_cfg->aq_hw_caps;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
+		err = -EOPNOTSUPP;
+		goto err_exit;
+	}
+
+	spin_lock(&aq_nic->aq_spinlock);
+
+	if (netif_running(ndev))
+		dev_close(ndev);
+
+	aq_nic_free_vectors(aq_nic);
+
+	aq_nic_cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
+	aq_nic_cfg->rxds = min(aq_nic_cfg->rxds, hw_caps->rxds_max);
+	aq_nic_cfg->rxds = ALIGN(aq_nic_cfg->rxds, AQ_HW_RXD_MULTIPLE);
+
+	aq_nic_cfg->txds = max(ring->tx_pending, hw_caps->txds_min);
+	aq_nic_cfg->txds = min(aq_nic_cfg->txds, hw_caps->txds_max);
+	aq_nic_cfg->txds = ALIGN(aq_nic_cfg->txds, AQ_HW_TXD_MULTIPLE);
+
+	for (aq_nic->aq_vecs = 0; aq_nic->aq_vecs < aq_nic_cfg->vecs;
+	     aq_nic->aq_vecs++) {
+		aq_nic->aq_vec[aq_nic->aq_vecs] =
+		    aq_vec_alloc(aq_nic, aq_nic->aq_vecs, aq_nic_cfg);
+		if (unlikely(!aq_nic->aq_vec[aq_nic->aq_vecs])) {
+			err = -ENOMEM;
+			goto err_unlock;
+		}
+	}
+	if (!netif_running(ndev))
+		err = dev_open(ndev);
+
+err_unlock:
+	spin_unlock(&aq_nic->aq_spinlock);
+err_exit:
+	return err;
+}
+
 const struct ethtool_ops aq_ethtool_ops = {
 	.get_link            = aq_ethtool_get_link,
 	.get_regs_len        = aq_ethtool_get_regs_len,
@@ -291,6 +351,8 @@ const struct ethtool_ops aq_ethtool_ops = {
 	.get_drvinfo         = aq_ethtool_get_drvinfo,
 	.get_strings         = aq_ethtool_get_strings,
 	.get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
+	.get_ringparam       = aq_get_ringparam,
+	.set_ringparam       = aq_set_ringparam,
 	.get_rxfh_key_size   = aq_ethtool_get_rss_key_size,
 	.get_rxfh            = aq_ethtool_get_rss,
 	.get_rxnfc           = aq_ethtool_get_rxnfc,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index a2d416b..904cdfd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -24,8 +24,10 @@ struct aq_hw_caps_s {
 	u64 link_speed_msk;
 	unsigned int hw_priv_flags;
 	u32 media_type;
-	u32 rxds;
-	u32 txds;
+	u32 rxds_max;
+	u32 txds_max;
+	u32 rxds_min;
+	u32 txds_min;
 	u32 txhwb_alignment;
 	u32 irq_mask;
 	u32 vecs;
@@ -98,6 +100,9 @@ struct aq_stats_s {
 #define AQ_HW_MEDIA_TYPE_TP    1U
 #define AQ_HW_MEDIA_TYPE_FIBRE 2U
 
+#define AQ_HW_TXD_MULTIPLE 8U
+#define AQ_HW_RXD_MULTIPLE 8U
+
 struct aq_hw_s {
 	atomic_t flags;
 	u8 rbl_enabled:1;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 1a1a638..05d4e28 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -89,8 +89,8 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 	aq_nic_rss_init(self, cfg->num_rss_queues);
 
 	/*descriptors */
-	cfg->rxds = min(cfg->aq_hw_caps->rxds, AQ_CFG_RXDS_DEF);
-	cfg->txds = min(cfg->aq_hw_caps->txds, AQ_CFG_TXDS_DEF);
+	cfg->rxds = min(cfg->aq_hw_caps->rxds_max, AQ_CFG_RXDS_DEF);
+	cfg->txds = min(cfg->aq_hw_caps->txds_max, AQ_CFG_TXDS_DEF);
 
 	/*rss rings */
 	cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
@@ -158,6 +158,8 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
 	int ctimer = AQ_CFG_SERVICE_TIMER_INTERVAL;
 	int err = 0;
 
+	spin_lock(&self->aq_spinlock);
+
 	if (aq_utils_obj_test(&self->flags, AQ_NIC_FLAGS_IS_NOT_READY))
 		goto err_exit;
 
@@ -175,6 +177,7 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
 		ctimer = max(ctimer / 2, 1);
 
 err_exit:
+	spin_unlock(&self->aq_spinlock);
 	mod_timer(&self->service_timer, jiffies + ctimer);
 }
 
@@ -288,6 +291,8 @@ int aq_nic_init(struct aq_nic_s *self)
 		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
 		aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw);
 
+	spin_lock_init(&self->aq_spinlock);
+
 	netif_carrier_off(self->ndev);
 
 err_exit:
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index faa533a..aa1cef7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -81,6 +81,8 @@ struct aq_nic_s {
 	struct pci_dev *pdev;
 	unsigned int msix_entry_mask;
 	u32 irqvecs;
+	/* NIC reconfiguration synchronization */
+	spinlock_t aq_spinlock;
 };
 
 static inline struct device *aq_nic_get_dev(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 67e2f9f..7fd6a7e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -19,29 +19,31 @@
 #include "hw_atl_a0_internal.h"
 
 #define DEFAULT_A0_BOARD_BASIC_CAPABILITIES \
-	.is_64_dma = true, \
-	.msix_irqs = 4U, \
-	.irq_mask = ~0U, \
-	.vecs = HW_ATL_A0_RSS_MAX, \
-	.tcs = HW_ATL_A0_TC_MAX, \
-	.rxd_alignment = 1U, \
-	.rxd_size = HW_ATL_A0_RXD_SIZE, \
-	.rxds = 248U, \
-	.txd_alignment = 1U, \
-	.txd_size = HW_ATL_A0_TXD_SIZE, \
-	.txds = 8U * 1024U, \
-	.txhwb_alignment = 4096U, \
-	.tx_rings = HW_ATL_A0_TX_RINGS, \
-	.rx_rings = HW_ATL_A0_RX_RINGS, \
-	.hw_features = NETIF_F_HW_CSUM | \
-			NETIF_F_RXHASH | \
-			NETIF_F_RXCSUM | \
-			NETIF_F_SG | \
-			NETIF_F_TSO, \
+	.is_64_dma = true,		  \
+	.msix_irqs = 4U,		  \
+	.irq_mask = ~0U,		  \
+	.vecs = HW_ATL_A0_RSS_MAX,	  \
+	.tcs = HW_ATL_A0_TC_MAX,	  \
+	.rxd_alignment = 1U,		  \
+	.rxd_size = HW_ATL_A0_RXD_SIZE,   \
+	.rxds_max = HW_ATL_A0_MAX_RXD,    \
+	.rxds_min = HW_ATL_A0_MIN_RXD,    \
+	.txd_alignment = 1U,		  \
+	.txd_size = HW_ATL_A0_TXD_SIZE,   \
+	.txds_max = HW_ATL_A0_MAX_TXD,    \
+	.txds_min = HW_ATL_A0_MIN_RXD,    \
+	.txhwb_alignment = 4096U,	  \
+	.tx_rings = HW_ATL_A0_TX_RINGS,   \
+	.rx_rings = HW_ATL_A0_RX_RINGS,   \
+	.hw_features = NETIF_F_HW_CSUM |  \
+			NETIF_F_RXHASH |  \
+			NETIF_F_RXCSUM |  \
+			NETIF_F_SG |	  \
+			NETIF_F_TSO,	  \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
-	.flow_control = true, \
-	.mtu = HW_ATL_A0_MTU_JUMBO, \
-	.mac_regs_count = 88, \
+	.flow_control = true,		  \
+	.mtu = HW_ATL_A0_MTU_JUMBO,       \
+	.mac_regs_count = 88,		  \
 	.hw_alive_check_addr = 0x10U
 
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc100 = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
index 1d88555..3c94cff 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
@@ -88,4 +88,12 @@
 
 #define HW_ATL_A0_FW_VER_EXPECTED 0x01050006U
 
+#define HW_ATL_A0_MIN_RXD \
+	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
+#define HW_ATL_A0_MIN_TXD \
+	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_TXD_MULTIPLE))
+
+#define HW_ATL_A0_MAX_RXD 8184U
+#define HW_ATL_A0_MAX_TXD 8184U
+
 #endif /* HW_ATL_A0_INTERNAL_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 819f6bc..4ea15b9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -20,30 +20,32 @@
 #include "hw_atl_llh_internal.h"
 
 #define DEFAULT_B0_BOARD_BASIC_CAPABILITIES \
-	.is_64_dma = true,	\
-	.msix_irqs = 4U,	\
-	.irq_mask = ~0U,	\
-	.vecs = HW_ATL_B0_RSS_MAX,	\
-	.tcs = HW_ATL_B0_TC_MAX,	\
-	.rxd_alignment = 1U,		\
-	.rxd_size = HW_ATL_B0_RXD_SIZE, \
-	.rxds = 4U * 1024U,		\
-	.txd_alignment = 1U,		\
-	.txd_size = HW_ATL_B0_TXD_SIZE, \
-	.txds = 8U * 1024U,		\
-	.txhwb_alignment = 4096U,	\
-	.tx_rings = HW_ATL_B0_TX_RINGS, \
-	.rx_rings = HW_ATL_B0_RX_RINGS, \
-	.hw_features = NETIF_F_HW_CSUM | \
-			NETIF_F_RXCSUM | \
-			NETIF_F_RXHASH | \
-			NETIF_F_SG |  \
-			NETIF_F_TSO | \
-			NETIF_F_LRO,  \
-	.hw_priv_flags = IFF_UNICAST_FLT,   \
-	.flow_control = true,		\
-	.mtu = HW_ATL_B0_MTU_JUMBO,	\
-	.mac_regs_count = 88,		\
+	.is_64_dma = true,		  \
+	.msix_irqs = 4U,		  \
+	.irq_mask = ~0U,		  \
+	.vecs = HW_ATL_B0_RSS_MAX,	  \
+	.tcs = HW_ATL_B0_TC_MAX,	  \
+	.rxd_alignment = 1U,		  \
+	.rxd_size = HW_ATL_B0_RXD_SIZE,   \
+	.rxds_max = HW_ATL_B0_MAX_RXD,    \
+	.rxds_min = HW_ATL_B0_MIN_RXD,    \
+	.txd_alignment = 1U,		  \
+	.txd_size = HW_ATL_B0_TXD_SIZE,   \
+	.txds_max = HW_ATL_B0_MAX_TXD,    \
+	.txds_min = HW_ATL_B0_MIN_TXD,    \
+	.txhwb_alignment = 4096U,	  \
+	.tx_rings = HW_ATL_B0_TX_RINGS,   \
+	.rx_rings = HW_ATL_B0_RX_RINGS,   \
+	.hw_features = NETIF_F_HW_CSUM |  \
+			NETIF_F_RXCSUM |  \
+			NETIF_F_RXHASH |  \
+			NETIF_F_SG |      \
+			NETIF_F_TSO |     \
+			NETIF_F_LRO,      \
+	.hw_priv_flags = IFF_UNICAST_FLT, \
+	.flow_control = true,		  \
+	.mtu = HW_ATL_B0_MTU_JUMBO,	  \
+	.mac_regs_count = 88,		  \
 	.hw_alive_check_addr = 0x10U
 
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 405d145..28568f5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -142,6 +142,14 @@
 #define HW_ATL_INTR_MODER_MAX  0x1FF
 #define HW_ATL_INTR_MODER_MIN  0xFF
 
+#define HW_ATL_B0_MIN_RXD \
+	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
+#define HW_ATL_B0_MIN_TXD \
+	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_TXD_MULTIPLE))
+
+#define HW_ATL_B0_MAX_RXD 8184U
+#define HW_ATL_B0_MAX_TXD 8184U
+
 /* HW layer capabilities */
 
 #endif /* HW_ATL_B0_INTERNAL_H */
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 0/5] net: aquantia: various ethtool ops implementation
From: Igor Russkikh @ 2018-05-29 12:56 UTC (permalink / raw)
  To: David S . Miller; +Cc: netdev, David Arcari, Pavel Belous, Igor Russkikh

In this patchset Anton Mikaev and I added some useful ethtool operations:
- ring size changes
- link renegotioation
- flow control management

The patch also improves init/deinit sequence.

Igor Russkikh (5):
  net: aquantia: Ethtool based ring size configuration
  net: aquantia: Improve adapter init/deinit logic
  net: aquantia: Implement rx/tx flow control ethtools callback
  net: aquantia: Add renegotiate ethtool operation support
  net: aquantia: bump driver version

 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    | 118 +++++++++++++++++++++
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  20 +++-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |  17 ++-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |   2 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  |  47 ++++----
 .../aquantia/atlantic/hw_atl/hw_atl_a0_internal.h  |   8 ++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  51 ++++-----
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h  |   8 ++
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        |  54 +++++-----
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h        |  35 ++++++
 .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c   |  69 +++++++++++-
 drivers/net/ethernet/aquantia/atlantic/ver.h       |   4 +-
 12 files changed, 349 insertions(+), 84 deletions(-)

-- 
2.7.4

^ permalink raw reply

* Re: [PATCH net-next v3 1/7] net: bridge: Extract boilerplate around switchdev_port_obj_*()
From: Vivien Didelot @ 2018-05-29 12:42 UTC (permalink / raw)
  To: Petr Machata, netdev, devel, bridge
  Cc: f.fainelli, andrew, nikolay, gregkh, idosch, jiri,
	razvan.stefanescu, davem
In-Reply-To: <85401f20b801fa1bae3025bf1df991a9d475fe85.1527519997.git.petrm@mellanox.com>

Hi Petr,

Petr Machata <petrm@mellanox.com> writes:

> A call to switchdev_port_obj_add() or switchdev_port_obj_del() involves
> initializing a struct switchdev_obj_port_vlan, a piece of code that
> repeats on each call site almost verbatim. While in the current codebase
> there is just one duplicated add call, the follow-up patches add more of
> both add and del calls.
>
> Thus to remove the duplication, extract the repetition into named
> functions and reuse.
>
> Signed-off-by: Petr Machata <petrm@mellanox.com>

Considering Dan's comment as well:

Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>

Thanks,

        Vivien

^ permalink raw reply

* Re: [PATCH 1/4 RFCv2] net: phy: realtek: Support RTL8366RB variant
From: Andrew Lunn @ 2018-05-29 12:34 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Vivien Didelot, Florian Fainelli, netdev, openwrt-devel,
	LEDE Development List, Antti Seppälä, Roman Yeryomin,
	Colin Leitner, Gabor Juhos
In-Reply-To: <20180528174752.6806-2-linus.walleij@linaro.org>

On Mon, May 28, 2018 at 07:47:49PM +0200, Linus Walleij wrote:
> The RTL8366RB is an ASIC with five internal PHYs for
> LAN0..LAN3 and WAN. The PHYs are spawn off the main
> device so they can be handled in a distributed manner
> by the Realtek PHY driver. All that is really needed
> is the power save feature enablement and letting the
> PHY driver core pick up the IRQ from the switch chip.
> 
> Cc: Antti Seppälä <a.seppala@gmail.com>
> Cc: Roman Yeryomin <roman@advem.lv>
> Cc: Colin Leitner <colin.leitner@googlemail.com>
> Cc: Gabor Juhos <juhosg@openwrt.org>
> Cc: Florian Fainelli <f.fainelli@gmail.com>
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---
> ChangeLog RFCv1->RFCv2:
> - No real changes.
> ---
>  drivers/net/phy/realtek.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
> index 9f48ecf9c627..21624d1c9d38 100644
> --- a/drivers/net/phy/realtek.c
> +++ b/drivers/net/phy/realtek.c
> @@ -37,6 +37,9 @@
>  #define RTL8201F_ISR				0x1e
>  #define RTL8201F_IER				0x13
>  
> +#define RTL8366RB_POWER_SAVE	0x21
> +#define RTL8366RB_POWER_SAVE_ON 0x1000
> +
>  MODULE_DESCRIPTION("Realtek PHY driver");
>  MODULE_AUTHOR("Johnson Leung");
>  MODULE_LICENSE("GPL");
> @@ -145,6 +148,22 @@ static int rtl8211f_config_init(struct phy_device *phydev)
>  	return phy_modify_paged(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY, val);
>  }
>  
> +static int rtl8366rb_config_init(struct phy_device *phydev)
> +{
> +	int ret;
> +	u16 reg;
> +
> +	ret = genphy_config_init(phydev);
> +	if (ret < 0)
> +		return ret;
> +
> +	reg = phy_read(phydev, RTL8366RB_POWER_SAVE);
> +	reg |= RTL8366RB_POWER_SAVE_ON;
> +	phy_write(phydev, RTL8366RB_POWER_SAVE, reg);
> +
> +	return 0;
> +}
> +
>  static struct phy_driver realtek_drvs[] = {
>  	{
>  		.phy_id         = 0x00008201,
> @@ -207,6 +226,18 @@ static struct phy_driver realtek_drvs[] = {
>  		.resume		= genphy_resume,
>  		.read_page	= rtl821x_read_page,
>  		.write_page	= rtl821x_write_page,
> +	}, {
> +		/* The main part of this DSA is in drivers/net/dsa */

Hi Linus

I would not bother with this comment. We don't say, The main part of
this driver is in drivers/net/ethernet/... PHY drivers should be
completely separate to MAC drivers.

Otherwise this looks good.

	Andrew



> +		.phy_id		= 0x001cc961,
> +		.name		= "RTL8366RB Gigabit Ethernet",
> +		.phy_id_mask	= 0x001fffff,
> +		.features	= PHY_GBIT_FEATURES,
> +		.flags		= PHY_HAS_INTERRUPT,
> +		.config_aneg	= &genphy_config_aneg,
> +		.config_init	= &rtl8366rb_config_init,
> +		.read_status	= &genphy_read_status,
> +		.suspend	= genphy_suspend,
> +		.resume		= genphy_resume,
>  	},
>  };
>  
> @@ -218,6 +249,7 @@ static struct mdio_device_id __maybe_unused realtek_tbl[] = {
>  	{ 0x001cc914, 0x001fffff },
>  	{ 0x001cc915, 0x001fffff },
>  	{ 0x001cc916, 0x001fffff },
> +	{ 0x001cc961, 0x001fffff },
>  	{ }
>  };
>  
> -- 
> 2.17.0
> 

^ permalink raw reply

* Re: [PATCH 0/4 RFCv2] Realtek SMI RTL836x DSA driver
From: Andrew Lunn @ 2018-05-29 12:24 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Vivien Didelot, Florian Fainelli, netdev,
	OpenWrt Development List, LEDE Development List
In-Reply-To: <CACRpkdbnO4vhGHTodkMkP+Zgrr71jp2pGuyXCg03YzjK1D1NTw@mail.gmail.com>

On Tue, May 29, 2018 at 10:49:46AM +0200, Linus Walleij wrote:
> On Mon, May 28, 2018 at 8:20 PM, Andrew Lunn <andrew@lunn.ch> wrote:
> > On Mon, May 28, 2018 at 07:47:48PM +0200, Linus Walleij wrote:
> >> This is a second RFC version of the DSA driver for Realtek
> >> RTL8366x especially RTL8366RB.
> >>
> >> I've been beating my head against this one and I'm not really
> >> clear on why my ethernet frames are not coming through to the
> >> CPU port on the chip.
> >>
> >> It appears when using ethtool -S on the ports that packets
> >> are passing fine into the router fabric and through to the
> >> CPU port but the ethernet driver where the fixed link is
> >> connected refuse to accept the packages.
> >
> > Hi Linus
> >
> > Have you played with RGMII delays?
> 
> No not like I changed them or anything... the SoC has some
> set-up for skew and delay on the nanosecond level, but I used the
> vendor defaults, verified to be the same in their custom
> kernel tree.

Hi Linus

Did you look at the switch end? I found a datasheet for the
8366/8369. Register at 0x0050, P8GCR. It has two bits for RGMII
delays.

With RGMII delays, you have 3 'choices'.

1) The hardware design includes the delay, by zig-zagging the clock
line to make it longer.
2) The 'MAC' side does the delay.
3) The 'PHY' side does the delay.

I normally recommend the PHY side doing it, because that's what most
board do. Gives us some consistency. But it does not really
matter. Just make sure one side, and only once side is inserting the
delays.

	Andrew

^ permalink raw reply

* Re: [PATCH bpf-next 06/11] bpf: add bpf_skb_cgroup_id helper
From: Quentin Monnet @ 2018-05-29 12:15 UTC (permalink / raw)
  To: Daniel Borkmann, ast; +Cc: netdev
In-Reply-To: <20180528004344.3606-7-daniel@iogearbox.net>

Hi Daniel,

2018-05-28 02:43 UTC+0200 ~ Daniel Borkmann <daniel@iogearbox.net>
> Add a new bpf_skb_cgroup_id() helper that allows to retrieve the
> cgroup id from the skb's socket. This is useful in particular to
> enable bpf_get_cgroup_classid()-like behavior for cgroup v1 in
> cgroup v2 by allowing ID based matching on egress. This can in
> particular be used in combination with applying policy e.g. from
> map lookups, and also complements the older bpf_skb_under_cgroup()
> interface. In user space the cgroup id for a given path can be
> retrieved through the f_handle as demonstrated in [0] recently.
> 
>   [0] https://lkml.org/lkml/2018/5/22/1190
> 
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> Acked-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  include/uapi/linux/bpf.h | 17 ++++++++++++++++-
>  net/core/filter.c        | 29 +++++++++++++++++++++++++++--
>  2 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9b8c6e3..e2853aa 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2004,6 +2004,20 @@ union bpf_attr {
>   * 		direct packet access.
>   *	Return
>   * 		0 on success, or a negative error in case of failure.
> + *
> + * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
> + * 	Description
> + * 		Return the cgroup v2 id of the socket associated with the *skb*.
> + * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
> + * 		helper for cgroup v1 by providing a tag resp. identifier that
> + * 		can be matched on or used for map lookups e.g. to implement
> + * 		policy. The cgroup v2 id of a given path in the hierarchy is
> + * 		exposed in user space through the f_handle API in order to get
> + * 		to the same 64-bit id.
> + *
> + * 		This helper can be used on TC egress path, but not on ingress.

Nitpick: Maybe mention that the kernel must be built with
CONFIG_SOCK_CGROUP_DATA option for the helper to be available?

Best,
Quentin


> + * 	Return
> + * 		The id is returned or 0 in case the id could not be retrieved.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -2082,7 +2096,8 @@ union bpf_attr {
>  	FN(lwt_push_encap),		\
>  	FN(lwt_seg6_store_bytes),	\
>  	FN(lwt_seg6_adjust_srh),	\
> -	FN(lwt_seg6_action),
> +	FN(lwt_seg6_action),		\
> +	FN(skb_cgroup_id),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index acf1f4f..717c740 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3661,6 +3661,27 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
>  	.arg3_type	= ARG_ANYTHING,
>  };
>  
> +#ifdef CONFIG_SOCK_CGROUP_DATA
> +BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
> +{
> +	struct sock *sk = skb_to_full_sk(skb);
> +	struct cgroup *cgrp;
> +
> +	if (!sk || !sk_fullsock(sk))
> +		return 0;
> +
> +	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> +	return cgrp->kn->id.id;
> +}
> +
> +static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
> +	.func           = bpf_skb_cgroup_id,
> +	.gpl_only       = false,
> +	.ret_type       = RET_INTEGER,
> +	.arg1_type      = ARG_PTR_TO_CTX,
> +};
> +#endif
> +
>  static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
>  				  unsigned long off, unsigned long len)
>  {
> @@ -4741,12 +4762,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  		return &bpf_get_socket_cookie_proto;
>  	case BPF_FUNC_get_socket_uid:
>  		return &bpf_get_socket_uid_proto;
> +	case BPF_FUNC_fib_lookup:
> +		return &bpf_skb_fib_lookup_proto;
>  #ifdef CONFIG_XFRM
>  	case BPF_FUNC_skb_get_xfrm_state:
>  		return &bpf_skb_get_xfrm_state_proto;
>  #endif
> -	case BPF_FUNC_fib_lookup:
> -		return &bpf_skb_fib_lookup_proto;
> +#ifdef CONFIG_SOCK_CGROUP_DATA
> +	case BPF_FUNC_skb_cgroup_id:
> +		return &bpf_skb_cgroup_id_proto;
> +#endif
>  	default:
>  		return bpf_base_func_proto(func_id);
>  	}
> 

^ permalink raw reply

* Re: [PATCH] bpfilter: fix building without CONFIG_INET
From: David Miller @ 2018-05-29 12:14 UTC (permalink / raw)
  To: arnd; +Cc: ast, netdev, linux-kernel
In-Reply-To: <20180529095535.81934-1-arnd@arndb.de>

From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 29 May 2018 11:55:06 +0200

> bpfilter_process_sockopt is a callback that gets called from
> ip_setsockopt() and ip_getsockopt(). However, when CONFIG_INET is
> disabled, it never gets called at all, and assigning a function to the
> callback pointer results in a link failure:
> 
> net/bpfilter/bpfilter_kern.o: In function `__stop_umh':
> bpfilter_kern.c:(.text.unlikely+0x3): undefined reference to `bpfilter_process_sockopt'
> net/bpfilter/bpfilter_kern.o: In function `load_umh':
> bpfilter_kern.c:(.init.text+0x73): undefined reference to `bpfilter_process_sockopt'
> 
> Since there is no caller in this configuration, I assume we can
> simply make the assignment conditional.
> 
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Applied, thanks Arnd.

^ permalink raw reply

* [PATCH net-next] net: davinci_mdio: fix building error without CONFIG_OF
From: YueHaibing @ 2018-05-29 11:56 UTC (permalink / raw)
  To: davem, grygorii.strashko, muvarov
  Cc: netdev, linux-kernel, andrew, fugang.duan, linux-omap, YueHaibing

gcc report a build error when compiling without CONFIG_OF
drivers/net/ethernet/ti/davinci_mdio.c: In function ‘davinci_mdio_probe’:
drivers/net/ethernet/ti/davinci_mdio.c:380:9: error: implicit declaration of function ‘davinci_mdio_probe_dt’ [-Werror=implicit-function-declaration]
   ret = davinci_mdio_probe_dt(&data->pdata, pdev);
         ^
Fixes: 9eae9c7d0875 ("drivers: net: davinci_mdio: enable pm runtime auto for ti cpsw-mdio")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/ethernet/ti/davinci_mdio.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 8ac7283..6e544d9 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -339,9 +339,7 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
 
 	return 0;
 }
-#endif
 
-#if IS_ENABLED(CONFIG_OF)
 static const struct davinci_mdio_of_param of_cpsw_mdio_data = {
 	.autosuspend_delay_ms = 100,
 };
@@ -352,6 +350,12 @@ static const struct of_device_id davinci_mdio_of_mtable[] = {
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, davinci_mdio_of_mtable);
+#else
+static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
+				 struct platform_device *pdev)
+{
+	return -EINVAL;
+}
 #endif
 
 static int davinci_mdio_probe(struct platform_device *pdev)
-- 
2.7.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox