Netdev List
 help / color / mirror / Atom feed
* RE: [PATCH rdma-next v3 08/14] IB/core: Support passing uhw for create_flow
From: Ruhl, Michael J @ 2018-05-31 14:42 UTC (permalink / raw)
  To: Leon Romanovsky, Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Or Gerlitz, Raed Salem, Yishai Hadas, Saeed Mahameed,
	linux-netdev
In-Reply-To: <20180531134341.18441-9-leon@kernel.org>

>-----Original Message-----
>From: Leon Romanovsky [mailto:leon@kernel.org]
>Sent: Thursday, May 31, 2018 9:44 AM
>To: Doug Ledford <dledford@redhat.com>; Jason Gunthorpe
><jgg@mellanox.com>
>Cc: Leon Romanovsky <leonro@mellanox.com>; RDMA mailing list <linux-
>rdma@vger.kernel.org>; Boris Pismenny <borisp@mellanox.com>; Matan
>Barak <matanb@mellanox.com>; Ruhl, Michael J <michael.j.ruhl@intel.com>;
>Or Gerlitz <ogerlitz@mellanox.com>; Raed Salem <raeds@mellanox.com>;
>Yishai Hadas <yishaih@mellanox.com>; Saeed Mahameed
><saeedm@mellanox.com>; linux-netdev <netdev@vger.kernel.org>
>Subject: [PATCH rdma-next v3 08/14] IB/core: Support passing uhw for
>create_flow
>
>From: Matan Barak <matanb@mellanox.com>
>
>This is required when user-space drivers need to pass extra information
>regarding how to handle this flow steering specification.
>
>Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
>Signed-off-by: Matan Barak <matanb@mellanox.com>
>Signed-off-by: Boris Pismenny <borisp@mellanox.com>
>Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>---
> drivers/infiniband/core/uverbs_cmd.c | 7 ++++++-
> drivers/infiniband/core/verbs.c      | 2 +-
> drivers/infiniband/hw/mlx4/main.c    | 6 +++++-
> drivers/infiniband/hw/mlx5/main.c    | 7 ++++++-
> include/rdma/ib_verbs.h              | 3 ++-
> 5 files changed, 20 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/infiniband/core/uverbs_cmd.c
>b/drivers/infiniband/core/uverbs_cmd.c
>index e74262ee104c..ddb9d79691be 100644
>--- a/drivers/infiniband/core/uverbs_cmd.c
>+++ b/drivers/infiniband/core/uverbs_cmd.c
>@@ -3542,11 +3542,16 @@ int ib_uverbs_ex_create_flow(struct
>ib_uverbs_file *file,
> 		err = -EINVAL;
> 		goto err_free;
> 	}
>-	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
>+
>+	flow_id = qp->device->create_flow(qp, flow_attr,
>+					  IB_FLOW_DOMAIN_USER, uhw);
>+

If the create_flow() callback is not defined, won't this cause a problem?

ib_flow_create() checks for the NULL.

Mike


> 	if (IS_ERR(flow_id)) {
> 		err = PTR_ERR(flow_id);
> 		goto err_free;
> 	}
>+	atomic_inc(&qp->usecnt);
>+	flow_id->qp = qp;
> 	flow_id->uobject = uobj;
> 	uobj->object = flow_id;
> 	uflow = container_of(uobj, typeof(*uflow), uobject);
>diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
>index 6ddfb1fade79..0b56828c1319 100644
>--- a/drivers/infiniband/core/verbs.c
>+++ b/drivers/infiniband/core/verbs.c
>@@ -1983,7 +1983,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
> 	if (!qp->device->create_flow)
> 		return ERR_PTR(-EOPNOTSUPP);
>
>-	flow_id = qp->device->create_flow(qp, flow_attr, domain);
>+	flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
> 	if (!IS_ERR(flow_id)) {
> 		atomic_inc(&qp->usecnt);
> 		flow_id->qp = qp;
>diff --git a/drivers/infiniband/hw/mlx4/main.c
>b/drivers/infiniband/hw/mlx4/main.c
>index bf12394c13c1..6fe5d5d1d1d9 100644
>--- a/drivers/infiniband/hw/mlx4/main.c
>+++ b/drivers/infiniband/hw/mlx4/main.c
>@@ -1848,7 +1848,7 @@ static int mlx4_ib_add_dont_trap_rule(struct
>mlx4_dev *dev,
>
> static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
> 				    struct ib_flow_attr *flow_attr,
>-				    int domain)
>+				    int domain, struct ib_udata *udata)
> {
> 	int err = 0, i = 0, j = 0;
> 	struct mlx4_ib_flow *mflow;
>@@ -1866,6 +1866,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct
>ib_qp *qp,
> 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
> 		return ERR_PTR(-EOPNOTSUPP);
>
>+	if (udata &&
>+	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
>+		return ERR_PTR(-EOPNOTSUPP);
>+
> 	memset(type, 0, sizeof(type));
>
> 	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
>diff --git a/drivers/infiniband/hw/mlx5/main.c
>b/drivers/infiniband/hw/mlx5/main.c
>index 92879d2d3026..fb31a719ee25 100644
>--- a/drivers/infiniband/hw/mlx5/main.c
>+++ b/drivers/infiniband/hw/mlx5/main.c
>@@ -3371,7 +3371,8 @@ static struct mlx5_ib_flow_handler
>*create_sniffer_rule(struct mlx5_ib_dev *dev,
>
> static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
> 					   struct ib_flow_attr *flow_attr,
>-					   int domain)
>+					   int domain,
>+					   struct ib_udata *udata)
> {
> 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
> 	struct mlx5_ib_qp *mqp = to_mqp(qp);
>@@ -3383,6 +3384,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct
>ib_qp *qp,
> 	int err;
> 	int underlay_qpn;
>
>+	if (udata &&
>+	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
>+		return ERR_PTR(-EOPNOTSUPP);
>+
> 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
> 		return ERR_PTR(-ENOMEM);
>
>diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>index ba49e874c841..84f412f7b8f3 100644
>--- a/include/rdma/ib_verbs.h
>+++ b/include/rdma/ib_verbs.h
>@@ -2459,7 +2459,8 @@ struct ib_device {
> 	struct ib_flow *	   (*create_flow)(struct ib_qp *qp,
> 						  struct ib_flow_attr
> 						  *flow_attr,
>-						  int domain);
>+						  int domain,
>+						  struct ib_udata *udata);
> 	int			   (*destroy_flow)(struct ib_flow *flow_id);
> 	int			   (*check_mr_status)(struct ib_mr *mr, u32
>check_mask,
> 						      struct ib_mr_status
>*mr_status);
>--
>2.14.3

^ permalink raw reply

* RE: [PATCH rdma-next v3 10/14] IB/uverbs: Add support for flow counters
From: Ruhl, Michael J @ 2018-05-31 14:49 UTC (permalink / raw)
  To: Leon Romanovsky, Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, RDMA mailing list, Boris Pismenny, Matan Barak,
	Or Gerlitz, Raed Salem, Yishai Hadas, Saeed Mahameed,
	linux-netdev
In-Reply-To: <20180531134341.18441-11-leon@kernel.org>

>-----Original Message-----
>From: Leon Romanovsky [mailto:leon@kernel.org]
>Sent: Thursday, May 31, 2018 9:44 AM
>To: Doug Ledford <dledford@redhat.com>; Jason Gunthorpe
><jgg@mellanox.com>
>Cc: Leon Romanovsky <leonro@mellanox.com>; RDMA mailing list <linux-
>rdma@vger.kernel.org>; Boris Pismenny <borisp@mellanox.com>; Matan
>Barak <matanb@mellanox.com>; Ruhl, Michael J <michael.j.ruhl@intel.com>;
>Or Gerlitz <ogerlitz@mellanox.com>; Raed Salem <raeds@mellanox.com>;
>Yishai Hadas <yishaih@mellanox.com>; Saeed Mahameed
><saeedm@mellanox.com>; linux-netdev <netdev@vger.kernel.org>
>Subject: [PATCH rdma-next v3 10/14] IB/uverbs: Add support for flow
>counters
>
>From: Raed Salem <raeds@mellanox.com>
>
>The struct ib_uverbs_flow_spec_action_count associates
>a counters object with the flow.
>
>Post this association the flow counters can be read via
>the counters object.
>
>Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
>Signed-off-by: Raed Salem <raeds@mellanox.com>
>Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>---
> drivers/infiniband/core/uverbs.h     |  1 +
> drivers/infiniband/core/uverbs_cmd.c | 81
>+++++++++++++++++++++++++++++++-----
> include/uapi/rdma/ib_user_verbs.h    | 13 ++++++
> 3 files changed, 84 insertions(+), 11 deletions(-)
>
>diff --git a/drivers/infiniband/core/uverbs.h
>b/drivers/infiniband/core/uverbs.h
>index 5b2461fa634d..c0d40fc3a53a 100644
>--- a/drivers/infiniband/core/uverbs.h
>+++ b/drivers/infiniband/core/uverbs.h
>@@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec {
> 		struct ib_uverbs_flow_spec_action_tag	flow_tag;
> 		struct ib_uverbs_flow_spec_action_drop	drop;
> 		struct ib_uverbs_flow_spec_action_handle action;
>+		struct ib_uverbs_flow_spec_action_count flow_count;
> 	};
> };
>
>diff --git a/drivers/infiniband/core/uverbs_cmd.c
>b/drivers/infiniband/core/uverbs_cmd.c
>index ddb9d79691be..3179a95c6f5e 100644
>--- a/drivers/infiniband/core/uverbs_cmd.c
>+++ b/drivers/infiniband/core/uverbs_cmd.c
>@@ -2748,43 +2748,82 @@ ssize_t ib_uverbs_detach_mcast(struct
>ib_uverbs_file *file,
> struct ib_uflow_resources {
> 	size_t			max;
> 	size_t			num;
>-	struct ib_flow_action	*collection[0];
>+	size_t			collection_num;
>+	size_t			counters_num;
>+	struct ib_counters	**counters;
>+	struct ib_flow_action	**collection;
> };
>
> static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
> {
> 	struct ib_uflow_resources *resources;
>
>-	resources =
>-		kmalloc(sizeof(*resources) +
>-			num_specs * sizeof(*resources->collection),
>GFP_KERNEL);
>+	resources = kzalloc(sizeof(*resources), GFP_KERNEL);
>
> 	if (!resources)
>-		return NULL;
>+		goto err_res;

Why the new goto?

>+
>+	resources->counters =
>+		kcalloc(num_specs, sizeof(*resources->counters),
>GFP_KERNEL);
>+
>+	if (!resources->counters)
>+		goto err_cnt;

kcalloc() zeros stuff.  Could you just have a single common goto for the
cleanup?

Mike

>+
>+	resources->collection =
>+		kcalloc(num_specs, sizeof(*resources->collection),
>GFP_KERNEL);
>+
>+	if (!resources->collection)
>+		goto err_collection;
>
>-	resources->num = 0;
> 	resources->max = num_specs;
>
> 	return resources;
>+
>+err_collection:
>+	kfree(resources->counters);
>+err_cnt:
>+	kfree(resources);
>+err_res:
>+	return NULL;
> }
>
> void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
> {
> 	unsigned int i;
>
>-	for (i = 0; i < uflow_res->num; i++)
>+	for (i = 0; i < uflow_res->collection_num; i++)
> 		atomic_dec(&uflow_res->collection[i]->usecnt);
>
>+	for (i = 0; i < uflow_res->counters_num; i++)
>+		atomic_dec(&uflow_res->counters[i]->usecnt);
>+
>+	kfree(uflow_res->collection);
>+	kfree(uflow_res->counters);
> 	kfree(uflow_res);
> }
>
> static void flow_resources_add(struct ib_uflow_resources *uflow_res,
>-			       struct ib_flow_action *action)
>+			       enum ib_flow_spec_type type,
>+			       void *ibobj)
> {
> 	WARN_ON(uflow_res->num >= uflow_res->max);
>
>-	atomic_inc(&action->usecnt);
>-	uflow_res->collection[uflow_res->num++] = action;
>+	switch (type) {
>+	case IB_FLOW_SPEC_ACTION_HANDLE:
>+		atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
>+		uflow_res->collection[uflow_res->collection_num++] =
>+			(struct ib_flow_action *)ibobj;
>+		break;
>+	case IB_FLOW_SPEC_ACTION_COUNT:
>+		atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
>+		uflow_res->counters[uflow_res->counters_num++] =
>+			(struct ib_counters *)ibobj;
>+		break;
>+	default:
>+		WARN_ON(1);
>+	}
>+
>+	uflow_res->num++;
> }
>
> static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
>@@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct
>ib_ucontext *ucontext,
> 			return -EINVAL;
> 		ib_spec->action.size =
> 			sizeof(struct ib_flow_spec_action_handle);
>-		flow_resources_add(uflow_res, ib_spec->action.act);
>+		flow_resources_add(uflow_res,
>+				   IB_FLOW_SPEC_ACTION_HANDLE,
>+				   ib_spec->action.act);
> 		uobj_put_obj_read(ib_spec->action.act);
> 		break;
>+	case IB_FLOW_SPEC_ACTION_COUNT:
>+		if (kern_spec->flow_count.size !=
>+			sizeof(struct ib_uverbs_flow_spec_action_count))
>+			return -EINVAL;
>+		ib_spec->flow_count.counters =
>+			uobj_get_obj_read(counters,
>+					  UVERBS_OBJECT_COUNTERS,
>+					  kern_spec->flow_count.handle,
>+					  ucontext);
>+		if (!ib_spec->flow_count.counters)
>+			return -EINVAL;
>+		ib_spec->flow_count.size =
>+				sizeof(struct ib_flow_spec_action_count);
>+		flow_resources_add(uflow_res,
>+				   IB_FLOW_SPEC_ACTION_COUNT,
>+				   ib_spec->flow_count.counters);
>+		uobj_put_obj_read(ib_spec->flow_count.counters);
>+		break;
> 	default:
> 		return -EINVAL;
> 	}
>diff --git a/include/uapi/rdma/ib_user_verbs.h
>b/include/uapi/rdma/ib_user_verbs.h
>index 409507f83b91..4f9991de8e3a 100644
>--- a/include/uapi/rdma/ib_user_verbs.h
>+++ b/include/uapi/rdma/ib_user_verbs.h
>@@ -998,6 +998,19 @@ struct ib_uverbs_flow_spec_action_handle {
> 	__u32			      reserved1;
> };
>
>+struct ib_uverbs_flow_spec_action_count {
>+	union {
>+		struct ib_uverbs_flow_spec_hdr hdr;
>+		struct {
>+			__u32 type;
>+			__u16 size;
>+			__u16 reserved;
>+		};
>+	};
>+	__u32			      handle;
>+	__u32			      reserved1;
>+};
>+
> struct ib_uverbs_flow_tunnel_filter {
> 	__be32 tunnel_id;
> };
>--
>2.14.3

^ permalink raw reply

* [PATCH net-next 1/2] qed: Fix shared memory inconsistency between driver and the MFW.
From: Sudarsana Reddy Kalluru @ 2018-05-31 15:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, Ariel.Elior, Michal.Kalderon
In-Reply-To: <20180531150553.24334-1-sudarsana.kalluru@cavium.com>

The structure shared between driver and management firmware (MFW)
differ in sizes. The additional field defined by the MFW is not
relevant to the current driver. Add a dummy field to the structure.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 8e1e6e1..beba930 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -11996,6 +11996,7 @@ struct public_port {
 #define EEE_REMOTE_TW_RX_MASK   0xffff0000
 #define EEE_REMOTE_TW_RX_OFFSET 16
 
+	u32 reserved1;
 	u32 oem_cfg_port;
 #define OEM_CFG_CHANNEL_TYPE_MASK                       0x00000003
 #define OEM_CFG_CHANNEL_TYPE_OFFSET                     0
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 0/2] qed: Fix issues in UFP feature commit 'cac6f691'.
From: Sudarsana Reddy Kalluru @ 2018-05-31 15:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, Ariel.Elior, Michal.Kalderon, Sudarsana Reddy Kalluru

From: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>

This patch series fixes couple of issues in the UFP feature commit,
   cac6f691: Add support for Unified Fabric Port.

Please consider applying it to "net-next".

Sudarsana Reddy Kalluru (2):
  qed: Fix shared memory inconsistency between driver and the MFW.
  qed: Fix use of incorrect shmem address.

 drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH net-next 2/2] qed: Fix use of incorrect shmem address.
From: Sudarsana Reddy Kalluru @ 2018-05-31 15:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, Ariel.Elior, Michal.Kalderon
In-Reply-To: <20180531150553.24334-1-sudarsana.kalluru@cavium.com>

Incorrect shared memory address is used while deriving the values
for tc and pri_type. Use shmem address corresponding to 'oem_cfg_func'
where the management firmare saves tc/pri_type values.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 2612e3e..6f9927d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1514,9 +1514,10 @@ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	}
 
 	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
-	val = (port_cfg & OEM_CFG_FUNC_TC_MASK) >> OEM_CFG_FUNC_TC_OFFSET;
+	val = (shmem_info.oem_cfg_func & OEM_CFG_FUNC_TC_MASK) >>
+		OEM_CFG_FUNC_TC_OFFSET;
 	p_hwfn->ufp_info.tc = (u8)val;
-	val = (port_cfg & OEM_CFG_FUNC_HOST_PRI_CTRL_MASK) >>
+	val = (shmem_info.oem_cfg_func & OEM_CFG_FUNC_HOST_PRI_CTRL_MASK) >>
 		OEM_CFG_FUNC_HOST_PRI_CTRL_OFFSET;
 	if (val == OEM_CFG_FUNC_HOST_PRI_CTRL_VNIC) {
 		p_hwfn->ufp_info.pri_type = QED_UFP_PRI_VNIC;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH rdma-next v3 08/14] IB/core: Support passing uhw for create_flow
From: Yishai Hadas @ 2018-05-31 15:09 UTC (permalink / raw)
  To: Ruhl, Michael J
  Cc: Leon Romanovsky, Doug Ledford, Jason Gunthorpe, Leon Romanovsky,
	RDMA mailing list, Boris Pismenny, Matan Barak, Or Gerlitz,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <14063C7AD467DE4B82DEDB5C278E8663B38F0635@FMSMSX108.amr.corp.intel.com>

On 5/31/2018 5:42 PM, Ruhl, Michael J wrote:
>> -----Original Message-----
>> From: Leon Romanovsky [mailto:leon@kernel.org]
>> Sent: Thursday, May 31, 2018 9:44 AM
>> To: Doug Ledford <dledford@redhat.com>; Jason Gunthorpe
>> <jgg@mellanox.com>
>> Cc: Leon Romanovsky <leonro@mellanox.com>; RDMA mailing list <linux-
>> rdma@vger.kernel.org>; Boris Pismenny <borisp@mellanox.com>; Matan
>> Barak <matanb@mellanox.com>; Ruhl, Michael J <michael.j.ruhl@intel.com>;
>> Or Gerlitz <ogerlitz@mellanox.com>; Raed Salem <raeds@mellanox.com>;
>> Yishai Hadas <yishaih@mellanox.com>; Saeed Mahameed
>> <saeedm@mellanox.com>; linux-netdev <netdev@vger.kernel.org>
>> Subject: [PATCH rdma-next v3 08/14] IB/core: Support passing uhw for
>> create_flow
>>
>> From: Matan Barak <matanb@mellanox.com>
>>
>> This is required when user-space drivers need to pass extra information
>> regarding how to handle this flow steering specification.
>>
>> Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
>> Signed-off-by: Matan Barak <matanb@mellanox.com>
>> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
>> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>> ---
>> drivers/infiniband/core/uverbs_cmd.c | 7 ++++++-
>> drivers/infiniband/core/verbs.c      | 2 +-
>> drivers/infiniband/hw/mlx4/main.c    | 6 +++++-
>> drivers/infiniband/hw/mlx5/main.c    | 7 ++++++-
>> include/rdma/ib_verbs.h              | 3 ++-
>> 5 files changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/uverbs_cmd.c
>> b/drivers/infiniband/core/uverbs_cmd.c
>> index e74262ee104c..ddb9d79691be 100644
>> --- a/drivers/infiniband/core/uverbs_cmd.c
>> +++ b/drivers/infiniband/core/uverbs_cmd.c
>> @@ -3542,11 +3542,16 @@ int ib_uverbs_ex_create_flow(struct
>> ib_uverbs_file *file,
>> 		err = -EINVAL;
>> 		goto err_free;
>> 	}
>> -	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
>> +
>> +	flow_id = qp->device->create_flow(qp, flow_attr,
>> +					  IB_FLOW_DOMAIN_USER, uhw);
>> +
> 
> If the create_flow() callback is not defined, won't this cause a problem?
> 
> ib_flow_create() checks for the NULL.
> 

We are fine here.

This function is called only if the device turned on the command mask 
(i.e. IB_USER_VERBS_EX_CMD_CREATE_FLOW) which comes together with its 
callback.

see:
https://elixir.bootlin.com/linux/latest/source/drivers/infiniband/core/uverbs_main.c#L709

> 
> 
>> 	if (IS_ERR(flow_id)) {
>> 		err = PTR_ERR(flow_id);
>> 		goto err_free;
>> 	}
>> +	atomic_inc(&qp->usecnt);
>> +	flow_id->qp = qp;
>> 	flow_id->uobject = uobj;
>> 	uobj->object = flow_id;
>> 	uflow = container_of(uobj, typeof(*uflow), uobject);
>> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
>> index 6ddfb1fade79..0b56828c1319 100644
>> --- a/drivers/infiniband/core/verbs.c
>> +++ b/drivers/infiniband/core/verbs.c
>> @@ -1983,7 +1983,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
>> 	if (!qp->device->create_flow)
>> 		return ERR_PTR(-EOPNOTSUPP);
>>
>> -	flow_id = qp->device->create_flow(qp, flow_attr, domain);
>> +	flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
>> 	if (!IS_ERR(flow_id)) {
>> 		atomic_inc(&qp->usecnt);
>> 		flow_id->qp = qp;
>> diff --git a/drivers/infiniband/hw/mlx4/main.c
>> b/drivers/infiniband/hw/mlx4/main.c
>> index bf12394c13c1..6fe5d5d1d1d9 100644
>> --- a/drivers/infiniband/hw/mlx4/main.c
>> +++ b/drivers/infiniband/hw/mlx4/main.c
>> @@ -1848,7 +1848,7 @@ static int mlx4_ib_add_dont_trap_rule(struct
>> mlx4_dev *dev,
>>
>> static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
>> 				    struct ib_flow_attr *flow_attr,
>> -				    int domain)
>> +				    int domain, struct ib_udata *udata)
>> {
>> 	int err = 0, i = 0, j = 0;
>> 	struct mlx4_ib_flow *mflow;
>> @@ -1866,6 +1866,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct
>> ib_qp *qp,
>> 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
>> 		return ERR_PTR(-EOPNOTSUPP);
>>
>> +	if (udata &&
>> +	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
>> +		return ERR_PTR(-EOPNOTSUPP);
>> +
>> 	memset(type, 0, sizeof(type));
>>
>> 	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
>> diff --git a/drivers/infiniband/hw/mlx5/main.c
>> b/drivers/infiniband/hw/mlx5/main.c
>> index 92879d2d3026..fb31a719ee25 100644
>> --- a/drivers/infiniband/hw/mlx5/main.c
>> +++ b/drivers/infiniband/hw/mlx5/main.c
>> @@ -3371,7 +3371,8 @@ static struct mlx5_ib_flow_handler
>> *create_sniffer_rule(struct mlx5_ib_dev *dev,
>>
>> static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
>> 					   struct ib_flow_attr *flow_attr,
>> -					   int domain)
>> +					   int domain,
>> +					   struct ib_udata *udata)
>> {
>> 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
>> 	struct mlx5_ib_qp *mqp = to_mqp(qp);
>> @@ -3383,6 +3384,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct
>> ib_qp *qp,
>> 	int err;
>> 	int underlay_qpn;
>>
>> +	if (udata &&
>> +	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
>> +		return ERR_PTR(-EOPNOTSUPP);
>> +
>> 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
>> 		return ERR_PTR(-ENOMEM);
>>
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index ba49e874c841..84f412f7b8f3 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -2459,7 +2459,8 @@ struct ib_device {
>> 	struct ib_flow *	   (*create_flow)(struct ib_qp *qp,
>> 						  struct ib_flow_attr
>> 						  *flow_attr,
>> -						  int domain);
>> +						  int domain,
>> +						  struct ib_udata *udata);
>> 	int			   (*destroy_flow)(struct ib_flow *flow_id);
>> 	int			   (*check_mr_status)(struct ib_mr *mr, u32
>> check_mask,
>> 						      struct ib_mr_status
>> *mr_status);
>> --
>> 2.14.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [PATCH 0/4] RFC CPSW switchdev mode
From: Ilias Apalodimas @ 2018-05-31 15:27 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Ivan Vecera, Jiri Pirko, netdev, grygorii.strashko,
	ivan.khoronzhuk, nsekhar, francois.ozog, yogeshs, spatton
In-Reply-To: <20180525120938.GB20060@lunn.ch>

Sorry for the late response i had some time to take another look and do some
extra testing

> switchdev is about offloading what Linux can do to hardware to
> accelerate it. The switch is a block of accelerator hardware, like a
> GPU is for accelerating graphics. Linux can render OpenGL, but it is
> better to hand it over to the GPU accelerator.
>
> Same applies here. The Linux bridge can bridge multicast. Using the
> switchdev API, you can push that down to the accelerator, and let it
> do it.
>
> So you need to think about, how do you make the Linux bridge not pass
> multicast traffic to the host stack. Then how do you extend the
> switchdev API so you can push this down to the accelerator.
>

> To really get switchdev, you often need to pivot your point of view a
> bit. People often think, switchdev is about writing drivers for
> switches. Its not, its about how you offload networking which Linux
> can do down to a switch. And if the switch cannot accelerate it, you
> leave Linux to do it.
>
> When you get in the details, i think you will find the switchdev API
> actually already has what you need for this use case. What you need to
> figure out is how you make the Linux bridge not pass multicast to the
> host. Well, actually, not pass multicast it has not asked for. Then
> accelerate it.
The current driver is already working like that. The difference between the
modes of operation is this:
By registering the 'cpu port' we choose if the linux host is going to see the
br_ip4_multicast_igmp3_report or br_multicast_ipv4_rcv (by configuring the vlan
it participates) and trigger switchdev to add the MDBs
If the cpu port is member of that VLAN then the dynamic entry shows on 'bridge
mdb show' command i.e dev br0 port sw0p1 grp 239.1.1.1 temp offload vid 100
If not the user is able to add it manually.

Anyway i got the main points of the RFC, if Petr's patch get accepted i might be
able to respin this without registering a CPU port. 

Regards
Ilias

^ permalink raw reply

* Re: [PATCH iproute2] ip: IFLA_NEW_NETNSID/IFLA_NEW_IFINDEX support
From: Stephen Hemminger @ 2018-05-31 15:46 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: shemminger, netdev
In-Reply-To: <20180531142848.377-1-nicolas.dichtel@6wind.com>

On Thu, 31 May 2018 16:28:48 +0200
Nicolas Dichtel <nicolas.dichtel@6wind.com> wrote:

> Parse and display those attributes.
> Example:
> ip l a type dummy
> ip netns add foo
> ip monitor link&
> ip l s dummy1 netns foo
> Deleted 6: dummy1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
>     link/ether 66:af:3a:3f:a0:89 brd ff:ff:ff:ff:ff:ff new-nsid 0 new-ifindex 6
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
>  ip/ipaddress.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/ip/ipaddress.c b/ip/ipaddress.c
> index 00da14c6f97c..c7c7e7df4e81 100644
> --- a/ip/ipaddress.c
> +++ b/ip/ipaddress.c
> @@ -964,6 +964,17 @@ int print_linkinfo(const struct sockaddr_nl *who,
>  		}
>  	}
>  
> +	if (tb[IFLA_NEW_NETNSID]) {
> +		int id = rta_getattr_u32(tb[IFLA_NEW_NETNSID]);
> +
> +		print_int(PRINT_FP, NULL, " new-nsid %d", id);
> +	}
> +	if (tb[IFLA_NEW_IFINDEX]) {
> +		int id = rta_getattr_u32(tb[IFLA_NEW_IFINDEX]);
> +
> +		print_int(PRINT_FP, NULL, " new-ifindex %d", id);
> +	}
> +
>  	if (tb[IFLA_PROTO_DOWN]) {
>  		if (rta_getattr_u8(tb[IFLA_PROTO_DOWN]))
>  			print_bool(PRINT_ANY,

This makes sense. All of linkinfo that is present should be displayed.

Both netns and ifindex are really unsigned values. Use __u32 and print_uint.
Also why not convert numeric values to names?

^ permalink raw reply

* Re: [PATCH iproute2] ip: IFLA_NEW_NETNSID/IFLA_NEW_IFINDEX support
From: Nicolas Dichtel @ 2018-05-31 15:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20180531114615.3f10766f@shemminger-XPS-13-9360>

Le 31/05/2018 à 17:46, Stephen Hemminger a écrit :
> On Thu, 31 May 2018 16:28:48 +0200
[snip]
> This makes sense. All of linkinfo that is present should be displayed.
> 
> Both netns and ifindex are really unsigned values. Use __u32 and print_uint.
Ok.

> Also why not convert numeric values to names?
The only case where the ifname can change is when a netns is deleted and the
interface is put back in init_net. But at this stage, we don't know the new name.

For the nsid, you're right, it will be better to display the netns name. If you
agree, I will do this in a following patch, thus all places using nsid can be
converted at the same time.


Regards,
Nicolas

^ permalink raw reply

* Re: [PATCH net-next v2 0/2] net: phy: improve PHY suspend/resume
From: Heiner Kallweit @ 2018-05-31 15:58 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <20180530203512.GA16286@lunn.ch>

On 30.05.2018 22:35, Andrew Lunn wrote:
>> I think we need a better solution than spending the effort needed
>> to make the MDIO ops runtime-pm-aware. In general there seems to be
>> just one network driver using both phylib and runtime pm, so most
>> drivers aren't affected (yet).
>>
>> I will spend few more thoughts on a solution ..
> 
> Hi Heiner
> 
> Please keep in mind that MDIO is a generic bus. Many Ethernet switches
> are connected via MDIO. Some of those switches have MDIO busses of
> their own. Also, some Broadcom devices have USB-PHYs controlled over
> MDIO, etc.
> 
> So you need a generic solution here.
> 
>    Andrew
> 
The following proposed change (I combined three patches here) is quite
small, generic, and solves my problem. Another advantage is that it
doesn't impact existing code / drivers.
We just would have to see whether Rafael likes the idea of adding this
flag to the PM core.

Other bus subsystems would be free to adopt the same mechanism with
minimal effort.

Alternatively we could just add a flag to struct mii_bus and not touch
the PM core. But then the solution would be much less generic.

By the way: The problem is related to an experimental patch series for
splitting r8169/r8168 drivers and switching r8168 to phylib.
Therefore the change to r8168.c won't apply to existing kernel code.

Heiner

---
 drivers/net/ethernet/realtek/r8168.c | 1 +
 drivers/net/phy/phy_device.c         | 9 ++++++++-
 include/linux/pm.h                   | 6 ++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8168.c b/drivers/net/ethernet/realtek/r8168.c
index 473a147e..2e1af844 100644
--- a/drivers/net/ethernet/realtek/r8168.c
+++ b/drivers/net/ethernet/realtek/r8168.c
@@ -5063,6 +5063,7 @@ static int r8168_mdio_register(struct rtl8169_private *tp)
        new_bus->irq[0] = PHY_IGNORE_INTERRUPT;
        snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8168-%x",
                 PCI_DEVID(pdev->bus->number, pdev->devfn));
+       dev_pm_set_driver_flags(&new_bus->dev, DPM_FLAG_IGNORE_PM);

        new_bus->read = r8168_mdio_read_reg;
        new_bus->write = r8168_mdio_write_reg;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9e4ba8e8..459fd677 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -76,13 +76,20 @@ static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);

 #ifdef CONFIG_PM
+static bool mdio_bus_ignore_pm(struct phy_device *phydev)
+{
+       struct mii_bus *bus = phydev->mdio.bus;
+
+       return dev_pm_test_driver_flags(&bus->dev, DPM_FLAG_IGNORE_PM);
+}
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
        struct device_driver *drv = phydev->mdio.dev.driver;
        struct phy_driver *phydrv = to_phy_driver(drv);
        struct net_device *netdev = phydev->attached_dev;

-       if (!drv || !phydrv->suspend)
+       if (!drv || !phydrv->suspend || mdio_bus_ignore_pm(phydev))
                return false;

        /* PHY not attached? May suspend if the PHY has not already been
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e723b78d..922d2ded 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -560,6 +560,7 @@ struct pm_subsys_data {
  * SMART_PREPARE: Check the return value of the driver's ->prepare callback.
  * SMART_SUSPEND: No need to resume the device from runtime suspend.
  * LEAVE_SUSPENDED: Avoid resuming the device during system resume if possible.
+ * IGNORE_PM: Skip suspend/resume because the parent takes care.
  *
  * Setting SMART_PREPARE instructs bus types and PM domains which may want
  * system suspend/resume callbacks to be skipped for the device to return 0 from
@@ -576,11 +577,16 @@ struct pm_subsys_data {
  *
  * Setting LEAVE_SUSPENDED informs the PM core and middle-layer code that the
  * driver prefers the device to be left in suspend after system resume.
+ *
+ * Setting DPM_FLAG_IGNORE_PM instructs middle-layer code to skip suspending /
+ * resuming devices. This is meant for cases where the parent of a bus handles
+ * PM of the devices attached to the bus.
  */
 #define DPM_FLAG_NEVER_SKIP            BIT(0)
 #define DPM_FLAG_SMART_PREPARE         BIT(1)
 #define DPM_FLAG_SMART_SUSPEND         BIT(2)
 #define DPM_FLAG_LEAVE_SUSPENDED       BIT(3)
+#define DPM_FLAG_IGNORE_PM             BIT(4)

 struct dev_pm_info {
        pm_message_t            power_state;

^ permalink raw reply related

* Re: [bpf-next V2 PATCH 8/8] bpf/xdp: devmap can avoid calling ndo_xdp_flush
From: Song Liu @ 2018-05-31 16:06 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Networking, Daniel Borkmann, Alexei Starovoitov, Song Liu,
	John Fastabend
In-Reply-To: <152775722322.24817.6090081993515109790.stgit@firesoul>

On Thu, May 31, 2018 at 2:00 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> The XDP_REDIRECT map devmap can avoid using ndo_xdp_flush, by instead
> instructing ndo_xdp_xmit to flush via XDP_XMIT_FLUSH flag in
> appropriate places.
>
> Notice after this patch it is possible to remove ndo_xdp_flush
> completely, as this is the last user of ndo_xdp_flush. This is left
> for later patches, to keep driver changes separate.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  kernel/bpf/devmap.c |   19 ++++++-------------
>  1 file changed, 6 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index 04fbd75a5274..993b2dd07661 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
>  }
>
>  static int bq_xmit_all(struct bpf_dtab_netdev *obj,
> -                        struct xdp_bulk_queue *bq)
> +                      struct xdp_bulk_queue *bq, u32 flags)
>  {
>         struct net_device *dev = obj->dev;
>         int sent = 0, drops = 0, err = 0;
> @@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
>                 prefetch(xdpf);
>         }
>
> -       sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, 0);
> +       sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
>         if (sent < 0) {
>                 err = sent;
>                 sent = 0;
> @@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map)
>         for_each_set_bit(bit, bitmap, map->max_entries) {
>                 struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
>                 struct xdp_bulk_queue *bq;
> -               struct net_device *netdev;
>
>                 /* This is possible if the dev entry is removed by user space
>                  * between xdp redirect and flush op.
> @@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map)
>                 __clear_bit(bit, bitmap);
>
>                 bq = this_cpu_ptr(dev->bulkq);
> -               bq_xmit_all(dev, bq);
> -               netdev = dev->dev;
> -               if (likely(netdev->netdev_ops->ndo_xdp_flush))
> -                       netdev->netdev_ops->ndo_xdp_flush(netdev);
> +               bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
>         }
>  }
>
> @@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
>         struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
>
>         if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
> -               bq_xmit_all(obj, bq);
> +               bq_xmit_all(obj, bq, 0);
>
>         /* Ingress dev_rx will be the same for all xdp_frame's in
>          * bulk_queue, because bq stored per-CPU and must be flushed
> @@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
>
>  static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
>  {
> -       if (dev->dev->netdev_ops->ndo_xdp_flush) {
> -               struct net_device *fl = dev->dev;
> +       if (dev->dev->netdev_ops->ndo_xdp_xmit) {
>                 struct xdp_bulk_queue *bq;
>                 unsigned long *bitmap;
>
> @@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
>                         __clear_bit(dev->bit, bitmap);
>
>                         bq = per_cpu_ptr(dev->bulkq, cpu);
> -                       bq_xmit_all(dev, bq);
> -
> -                       fl->netdev_ops->ndo_xdp_flush(dev->dev);
> +                       bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
>                 }
>         }
>  }
>

^ permalink raw reply

* Re: [bpf-next V2 PATCH 3/8] ixgbe: implement flush flag for ndo_xdp_xmit
From: Song Liu @ 2018-05-31 16:14 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Networking, Daniel Borkmann, Alexei Starovoitov, Song Liu,
	John Fastabend
In-Reply-To: <152775719796.24817.11035788244128769860.stgit@firesoul>

On Thu, May 31, 2018 at 1:59 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> When passed the XDP_XMIT_FLUSH flag ixgbe_xdp_xmit now performs the
> same kind of ring tail update as in ixgbe_xdp_flush.  The update tail
> code in ixgbe_xdp_flush is generalized and shared with ixgbe_xdp_xmit.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index 87f088f4af52..4fd77c9067f2 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -10022,6 +10022,15 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>         }
>  }
>
> +static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
> +{
> +       /* Force memory writes to complete before letting h/w know there
> +        * are new descriptors to fetch.
> +        */
> +       wmb();
> +       writel(ring->next_to_use, ring->tail);
> +}
> +
>  static int ixgbe_xdp_xmit(struct net_device *dev, int n,
>                           struct xdp_frame **frames, u32 flags)
>  {
> @@ -10033,7 +10042,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
>         if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
>                 return -ENETDOWN;
>
> -       if (unlikely(flags & ~XDP_XMIT_FLAGS_NONE))
> +       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>                 return -EINVAL;
>
>         /* During program transitions its possible adapter->xdp_prog is assigned
> @@ -10054,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
>                 }
>         }
>
> +       if (unlikely(flags & XDP_XMIT_FLUSH))
> +               ixgbe_xdp_ring_update_tail(ring);
> +
>         return n - drops;
>  }
>
> @@ -10072,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
>         if (unlikely(!ring))
>                 return;
>
> -       /* Force memory writes to complete before letting h/w know there
> -        * are new descriptors to fetch.
> -        */
> -       wmb();
> -       writel(ring->next_to_use, ring->tail);
> +       ixgbe_xdp_ring_update_tail(ring);
>
>         return;
>  }
>

^ permalink raw reply

* [PATCH net-next 0/3] selftests/net: various
From: Willem de Bruijn @ 2018-05-31 16:14 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

A few odds and ends to network tests:

- msg_zerocopy: run as part of kselftest
- udp gso:      add missing bounds test for minimal sizes
- psocket_snd:  initial basic conformance test

Willem de Bruijn (3):
  selftests/net: enable msg_zerocopy test
  selftests/net: udpgso: test small gso_size boundary conditions
  selftests/net: add packet socket packet_snd test

 tools/testing/selftests/net/.gitignore      |   1 +
 tools/testing/selftests/net/Makefile        |   4 +-
 tools/testing/selftests/net/msg_zerocopy.sh |   8 +
 tools/testing/selftests/net/psock_snd.c     | 398 ++++++++++++++++++++
 tools/testing/selftests/net/psock_snd.sh    |  98 +++++
 tools/testing/selftests/net/udpgso.c        |  77 +++-
 6 files changed, 582 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/net/psock_snd.c
 create mode 100755 tools/testing/selftests/net/psock_snd.sh

-- 
2.17.0.921.gf22659ad46-goog

^ permalink raw reply

* [PATCH net-next 1/3] selftests/net: enable msg_zerocopy test
From: Willem de Bruijn @ 2018-05-31 16:14 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn
In-Reply-To: <20180531161440.89709-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

The existing msg_zerocopy test takes additional protocol arguments.
Add a variant that takes no arguments and runs all supported variants.
Call this from kselftest.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 tools/testing/selftests/net/Makefile        | 2 +-
 tools/testing/selftests/net/msg_zerocopy.sh | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7cb0f49efdb7..f39100b970af 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -6,7 +6,7 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
-TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index d571d213418d..c43c6debda06 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -21,6 +21,14 @@ readonly DADDR6='fd::2'
 
 readonly path_sysctl_mem="net.core.optmem_max"
 
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+	$0 4 tcp -t 1
+	$0 6 tcp -t 1
+	echo "OK. All tests passed"
+	exit 0
+fi
+
 # Argument parsing
 if [[ "$#" -lt "2" ]]; then
 	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
-- 
2.17.0.921.gf22659ad46-goog

^ permalink raw reply related

* [PATCH net-next 2/3] selftests/net: udpgso: test small gso_size boundary conditions
From: Willem de Bruijn @ 2018-05-31 16:14 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn
In-Reply-To: <20180531161440.89709-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Verify that udpgso can generate segments smaller than device mtu, down
to the extreme case of 1B gso_size.

Verify that irrespective of gso_size, udpgso restricts the number of
segments it will generate per call (UDP_MAX_SEGMENTS).

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 tools/testing/selftests/net/udpgso.c | 77 +++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 48a0592db938..e279051bc631 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -34,6 +34,10 @@
 #define UDP_SEGMENT		103
 #endif
 
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS	(1 << 6UL)
+#endif
+
 #define CONST_MTU_TEST	1500
 
 #define CONST_HDRLEN_V4		(sizeof(struct iphdr) + sizeof(struct udphdr))
@@ -135,6 +139,38 @@ struct testcase testcases_v4[] = {
 		.gso_len = CONST_MSS_V4,
 		.tfail = true,
 	},
+	{
+		/* send a single 1B MSS: will fail, see single MSS above */
+		.tlen = 1,
+		.gso_len = 1,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
 	{
 		/* EOL */
 	}
@@ -210,6 +246,38 @@ struct testcase testcases_v6[] = {
 		.gso_len = CONST_MSS_V6,
 		.tfail = true,
 	},
+	{
+		/* send a single 1B MSS: will fail, see single MSS above */
+		.tlen = 1,
+		.gso_len = 1,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
 	{
 		/* EOL */
 	}
@@ -375,7 +443,8 @@ static bool __send_one(int fd, struct msghdr *msg, int flags)
 	int ret;
 
 	ret = sendmsg(fd, msg, flags);
-	if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM))
+	if (ret == -1 &&
+	    (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
 		return false;
 	if (ret == -1)
 		error(1, errno, "sendmsg");
@@ -466,7 +535,11 @@ static void run_one(struct testcase *test, int fdt, int fdr,
 	if (!sent)
 		return;
 
-	mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+	if (test->gso_len)
+		mss = test->gso_len;
+	else
+		mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
 
 	/* Recv all full MSS datagrams */
 	for (i = 0; i < test->r_num_mss; i++) {
-- 
2.17.0.921.gf22659ad46-goog

^ permalink raw reply related

* [PATCH net-next 3/3] selftests/net: add packet socket packet_snd test
From: Willem de Bruijn @ 2018-05-31 16:14 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn
In-Reply-To: <20180531161440.89709-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Add regression tests for PF_PACKET transmission using packet_snd.

The TPACKET ring interface has tests for transmission and reception.
This is an initial stab at the same for the send call based interface.

Packets are sent over loopback, then read twice. The entire packet is
read from another packet socket and compared. The packet is also
verified to arrive at a UDP socket for protocol conformance.

The test sends a packet over loopback, testing the following options
(not the full cross-product):

- SOCK_DGRAM
- SOCK_RAW
- vlan tag
- qdisc bypass
- bind() and sendto()
- virtio_net_hdr
- csum offload (NOT actual csum feature, ignored on loopback)
- gso

Besides these basic functionality tests, the test runs from a set
of bounds checks, positive and negative. Running over loopback, which
has dev->min_header_len, it cannot generate variable length hhlen.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 tools/testing/selftests/net/.gitignore   |   1 +
 tools/testing/selftests/net/Makefile     |   4 +-
 tools/testing/selftests/net/psock_snd.c  | 398 +++++++++++++++++++++++
 tools/testing/selftests/net/psock_snd.sh |  98 ++++++
 4 files changed, 499 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/net/psock_snd.c
 create mode 100755 tools/testing/selftests/net/psock_snd.sh

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index f0e6c35a93ae..128e548aa377 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,6 +1,7 @@
 msg_zerocopy
 socket
 psock_fanout
+psock_snd
 psock_tpacket
 reuseport_bpf
 reuseport_bpf_cpu
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f39100b970af..663e11e85727 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -6,11 +6,11 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
-TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
-TEST_GEN_FILES += tcp_mmap tcp_inq
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000000..3936d5c8adfc
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool	cfg_use_bind;
+static bool	cfg_use_csum_off;
+static bool	cfg_use_csum_off_bad;
+static bool	cfg_use_dgram;
+static bool	cfg_use_gso;
+static bool	cfg_use_qdisc_bypass;
+static bool	cfg_use_vlan;
+static bool	cfg_use_vnet;
+
+static char	*cfg_ifname = "lo";
+static int	cfg_mtu	= 1500;
+static int	cfg_payload_len = DATA_LEN;
+static int	cfg_truncate_len = INT_MAX;
+static uint16_t	cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ	(sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+	struct virtio_net_hdr *vh = header;
+
+	vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+	if (cfg_use_csum_off) {
+		vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+		vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+		/* position check field exactly one byte beyond end of packet */
+		if (cfg_use_csum_off_bad)
+			vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+					  vh->csum_offset - 1;
+	}
+
+	if (cfg_use_gso) {
+		vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+	}
+
+	return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+	struct ethhdr *eth = header;
+
+	if (cfg_use_vlan) {
+		uint16_t *tag = header + ETH_HLEN;
+
+		eth->h_proto = htons(ETH_P_8021Q);
+		tag[1] = htons(ETH_P_IP);
+		return ETH_HLEN + 4;
+	}
+
+	eth->h_proto = htons(ETH_P_IP);
+	return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+	iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+	return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+	const int alen = sizeof(uint32_t);
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(9);
+	udph->dest = htons(cfg_port);
+	udph->len = htons(len);
+
+	if (cfg_use_csum_off)
+		udph->check = build_ip_csum(header - (2 * alen), alen,
+					    htons(IPPROTO_UDP) + udph->len);
+	else
+		udph->check = 0;
+
+	return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+	int off = 0;
+
+	off += build_vnet_header(tbuf);
+	off += build_eth_header(tbuf + off);
+	off += build_ipv4_header(tbuf + off, payload_len);
+	off += build_udp_header(tbuf + off, payload_len);
+
+	if (off + payload_len > sizeof(tbuf))
+		error(1, 0, "payload length exceeds max");
+
+	memset(tbuf + off, DATA_CHAR, payload_len);
+
+	return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+	struct sockaddr_ll laddr = {0};
+
+	laddr.sll_family = AF_PACKET;
+	laddr.sll_protocol = htons(ETH_P_IP);
+	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!laddr.sll_ifindex)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+	int ret;
+
+	if (!cfg_use_vnet) {
+		buf += sizeof(struct virtio_net_hdr);
+		len -= sizeof(struct virtio_net_hdr);
+	}
+	if (cfg_use_dgram) {
+		buf += ETH_HLEN;
+		len -= ETH_HLEN;
+	}
+
+	if (cfg_use_bind) {
+		ret = write(fd, buf, len);
+	} else {
+		struct sockaddr_ll laddr = {0};
+
+		laddr.sll_protocol = htons(ETH_P_IP);
+		laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+		if (!laddr.sll_ifindex)
+			error(1, errno, "if_nametoindex");
+
+		ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+	}
+
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret != len)
+		error(1, 0, "write: %u %u", ret, len);
+
+	fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+	const int one = 1;
+	int fd, len;
+
+	fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (cfg_use_bind)
+		do_bind(fd);
+
+	if (cfg_use_qdisc_bypass &&
+	    setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+		error(1, errno, "setsockopt qdisc bypass");
+
+	if (cfg_use_vnet &&
+	    setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+		error(1, errno, "setsockopt vnet");
+
+	len = build_packet(cfg_payload_len);
+
+	if (cfg_truncate_len < len)
+		len = cfg_truncate_len;
+
+	do_send(fd, tbuf, len);
+
+	if (close(fd))
+		error(1, errno, "close t");
+
+	return len;
+}
+
+static int setup_rx(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	struct sockaddr_in raddr = {0};
+	int fd;
+
+	fd = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	raddr.sin_family = AF_INET;
+	raddr.sin_port = htons(cfg_port);
+	raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+	if (bind(fd, (void *)&raddr, sizeof(raddr)))
+		error(1, errno, "bind r");
+
+	return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+	int ret;
+
+	ret = recv(fd, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != expected_len)
+		error(1, 0, "recv: %u != %u", ret, expected_len);
+
+	if (memcmp(rbuf, expected, ret))
+		error(1, 0, "recv: data mismatch");
+
+	fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket p");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	pair_udp_setfilter(fd);
+	do_bind(fd);
+
+	return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+		switch (c) {
+		case 'b':
+			cfg_use_bind = true;
+			break;
+		case 'c':
+			cfg_use_csum_off = true;
+			break;
+		case 'C':
+			cfg_use_csum_off_bad = true;
+			break;
+		case 'd':
+			cfg_use_dgram = true;
+			break;
+		case 'g':
+			cfg_use_gso = true;
+			break;
+		case 'l':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'q':
+			cfg_use_qdisc_bypass = true;
+			break;
+		case 't':
+			cfg_truncate_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'v':
+			cfg_use_vnet = true;
+			break;
+		case 'V':
+			cfg_use_vlan = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+
+	if (cfg_use_vlan && cfg_use_dgram)
+		error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+	if (cfg_use_csum_off && !cfg_use_vnet)
+		error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+	if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+		error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+	if (cfg_use_gso && !cfg_use_csum_off)
+		error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+	int fdr, fds, total_len;
+
+	fdr = setup_rx();
+	fds = setup_sniffer();
+
+	total_len = do_tx();
+
+	/* BPF filter accepts only this length, vlan changes MAC */
+	if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+		do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+		      tbuf + sizeof(struct virtio_net_hdr));
+
+	do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+	if (close(fds))
+		error(1, errno, "close s");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (system("ip link set dev lo mtu 1500"))
+		error(1, errno, "ip link set mtu");
+	if (system("ip addr add dev lo 172.17.0.1/24"))
+		error(1, errno, "ip addr add");
+
+	run_test();
+
+	fprintf(stderr, "OK\n\n");
+	return 0;
+}
+
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000000..6331d91b86a6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
-- 
2.17.0.921.gf22659ad46-goog

^ permalink raw reply related

* Re: [bpf-next V2 PATCH 4/8] tun: implement flush flag for ndo_xdp_xmit
From: Song Liu @ 2018-05-31 16:14 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Networking, Daniel Borkmann, Alexei Starovoitov, Song Liu,
	John Fastabend
In-Reply-To: <152775720301.24817.4050947353101519726.stgit@firesoul>

On Thu, May 31, 2018 at 2:00 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> When passed the XDP_XMIT_FLUSH flag tun_xdp_xmit now performs the same
> kind of socket wake up as in tun_xdp_flush(). The wake up code from
> tun_xdp_flush is generalized and shared with tun_xdp_xmit.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  drivers/net/tun.c |   19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index b182b8cdd219..d82a05fb0594 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1285,6 +1285,14 @@ static const struct net_device_ops tun_netdev_ops = {
>         .ndo_get_stats64        = tun_net_get_stats64,
>  };
>
> +static void __tun_xdp_flush_tfile(struct tun_file *tfile)
> +{
> +       /* Notify and wake up reader process */
> +       if (tfile->flags & TUN_FASYNC)
> +               kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
> +       tfile->socket.sk->sk_data_ready(tfile->socket.sk);
> +}
> +
>  static int tun_xdp_xmit(struct net_device *dev, int n,
>                         struct xdp_frame **frames, u32 flags)
>  {
> @@ -1295,7 +1303,7 @@ static int tun_xdp_xmit(struct net_device *dev, int n,
>         int cnt = n;
>         int i;
>
> -       if (unlikely(flags & ~XDP_XMIT_FLAGS_NONE))
> +       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>                 return -EINVAL;
>
>         rcu_read_lock();
> @@ -1325,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n,
>         }
>         spin_unlock(&tfile->tx_ring.producer_lock);
>
> +       if (flags & XDP_XMIT_FLUSH)
> +               __tun_xdp_flush_tfile(tfile);
> +
>         rcu_read_unlock();
>         return cnt - drops;
>  }
> @@ -1353,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev)
>
>         tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
>                                             numqueues]);
> -       /* Notify and wake up reader process */
> -       if (tfile->flags & TUN_FASYNC)
> -               kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
> -       tfile->socket.sk->sk_data_ready(tfile->socket.sk);
> -
> +       __tun_xdp_flush_tfile(tfile);
>  out:
>         rcu_read_unlock();
>  }
>

^ permalink raw reply

* Re: [bpf-next V2 PATCH 5/8] virtio_net: implement flush flag for ndo_xdp_xmit
From: Song Liu @ 2018-05-31 16:15 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Networking, Daniel Borkmann, Alexei Starovoitov, Song Liu,
	John Fastabend
In-Reply-To: <152775720805.24817.1149926292475954272.stgit@firesoul>

On Thu, May 31, 2018 at 2:00 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> When passed the XDP_XMIT_FLUSH flag virtnet_xdp_xmit now performs the
> same virtqueue_kick as virtnet_xdp_flush.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  drivers/net/virtio_net.c |    6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4ed823625953..62ba8aadd8e6 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -481,7 +481,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
>         int err;
>         int i;
>
> -       if (unlikely(flags & ~XDP_XMIT_FLAGS_NONE))
> +       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>                 return -EINVAL;
>
>         qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
> @@ -507,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
>                         drops++;
>                 }
>         }
> +
> +       if (flags & XDP_XMIT_FLUSH)
> +               virtqueue_kick(sq->vq);
> +
>         return n - drops;
>  }
>
>

^ permalink raw reply

* Re: [PATCH rdma-next v3 00/14] Verbs flow counters support
From: Yishai Hadas @ 2018-05-31 16:15 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Leon Romanovsky, Leon Romanovsky, RDMA mailing list,
	Boris Pismenny, Matan Barak, Michael J . Ruhl, Or Gerlitz,
	Raed Salem, Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20180531134341.18441-1-leon@kernel.org>

On 5/31/2018 4:43 PM, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Changelog:
> v2->v3:
>   * Change function mlx5_fc_query signature to hide the details of
>     internal core driver struct mlx5_fc
>   * Add commen to data[] field at struct mlx5_ib_flow_counters_data (mlx5-abi.h)
>   * Use array of struct mlx5_ib_flow_counters_desc to clarify the output
> v1->v2:
>   * Removed conversion from struct mlx5_fc* to void*
>   * Fixed one place with double space in it
>   * Balanced release of hardware handler in case of counters allocation failure
>   * Added Tested-by
>   * Minimize time spent holding mutex lock
>   * Fixed deadlock caused by nested lock in error path
>   * Protect from handler pointer derefence in the error paths
> 
> Not changed: mlx5-abi.h
> 
> v0->v1:
>   * Decouple from DevX submission
>   * Use uverbs_attr_get_obj at counters read method
>   * Added define for max read buffer size (MAX_COUNTERS_BUFF_SIZE)
>   * Removed the struct mlx5_ib_flow_counter basic_flow_cnts and
>     the related structs used, used define instead
>   * Took Matan's patch from DevX
>   * uverbs_free_counters removed void* casting
>   * Added check to bound ncounters value (added define
>   * Changed user supplied data buffer structure to be array of
>     struct <desc,index> pair (applied this change to user space also)
> 
> Not changed:
>   * UAPI files
>   * Addition of uhw to flow
> 
> Thanks


The corresponding rdma-core PR [1] was updated to match this V3 series.
[1] https://github.com/linux-rdma/rdma-core/pull/330

Yishai

> ----------------------------------------------------------------------
>  From Raed:
> 
> This series comes to allow user space applications to monitor real time
> traffic activity and events of the verbs objects it manages, e.g.:
> ibv_qp, ibv_wq, ibv_flow.
> 
> This API enables generic counters creation and define mapping
> to association with a verbs object, current mlx5 driver using
> this API for flow counters.
> 
> With this API, an application can monitor the entire life cycle of
> object activity, defined here as a static counters attachment.
> This API also allows dynamic counters monitoring of measurement points
> for a partial period in the verbs object life cycle.
> 
> In addition it presents the implementation of the generic counters interface.
> 
> This will be achieved by extending flow creation by adding a new flow count
> specification type which allows the user to associate a previously created
> flow counters using the generic verbs counters interface to the created flow,
> once associated the user could read statistics by using the read function of
> the generic counters interface.
> 
> The API includes:
> 1. create and destroyed API of a new counters objects
> 2. read the counters values from HW
> 
> Note:
> Attaching API to allow application to define the measurement points per objects
> is a user space only API and this data is passed to kernel when the counted
> object (e.g. flow) is created with the counters object.
> 
> Thanks
> 
> Matan Barak (2):
>    IB/uverbs: Add an ib_uobject getter to ioctl() infrastructure
>    IB/core: Support passing uhw for create_flow
> 
> Or Gerlitz (1):
>    net/mlx5: Use flow counter pointer as input to the query function
> 
> Raed Salem (11):
>    net/mlx5: Export flow counter related API
>    IB/core: Introduce counters object and its create/destroy
>    IB/uverbs: Add create/destroy counters support
>    IB/core: Introduce counters read verb
>    IB/uverbs: Add read counters support
>    IB/core: Add support for flow counters
>    IB/uverbs: Add support for flow counters
>    IB/mlx5: Add counters create and destroy support
>    IB/mlx5: Add flow counters binding support
>    IB/mlx5: Add flow counters read support
>    IB/mlx5: Add counters read support
> 
>   drivers/infiniband/core/Makefile                   |   2 +-
>   drivers/infiniband/core/uverbs.h                   |   2 +
>   drivers/infiniband/core/uverbs_cmd.c               |  88 +++++-
>   drivers/infiniband/core/uverbs_std_types.c         |   3 +-
>   .../infiniband/core/uverbs_std_types_counters.c    | 157 +++++++++++
>   drivers/infiniband/core/uverbs_std_types_cq.c      |  23 +-
>   .../infiniband/core/uverbs_std_types_flow_action.c |   4 +-
>   drivers/infiniband/core/verbs.c                    |   2 +-
>   drivers/infiniband/hw/mlx4/main.c                  |   6 +-
>   drivers/infiniband/hw/mlx5/main.c                  | 305 ++++++++++++++++++++-
>   drivers/infiniband/hw/mlx5/mlx5_ib.h               |  36 +++
>   drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  15 +-
>   drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   2 -
>   .../net/ethernet/mellanox/mlx5/core/fs_counters.c  |   7 +-
>   include/linux/mlx5/fs.h                            |   4 +
>   include/rdma/ib_verbs.h                            |  43 ++-
>   include/rdma/uverbs_ioctl.h                        |  11 +
>   include/uapi/rdma/ib_user_ioctl_cmds.h             |  21 ++
>   include/uapi/rdma/ib_user_verbs.h                  |  13 +
>   include/uapi/rdma/mlx5-abi.h                       |  24 ++
>   20 files changed, 712 insertions(+), 56 deletions(-)
>   create mode 100644 drivers/infiniband/core/uverbs_std_types_counters.c
> 
> --
> 2.14.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [bpf-next V2 PATCH 6/8] xdp: done implementing ndo_xdp_xmit flush flag for all drivers
From: Song Liu @ 2018-05-31 16:16 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Networking, Daniel Borkmann, Alexei Starovoitov, Song Liu,
	John Fastabend
In-Reply-To: <152775721311.24817.8828370689349824973.stgit@firesoul>

On Thu, May 31, 2018 at 2:00 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> Removing XDP_XMIT_FLAGS_NONE as all driver now implement
> a flush operation in their ndo_xdp_xmit call.  The compiler
> will catch if any users of XDP_XMIT_FLAGS_NONE remains.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  include/net/xdp.h |    1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 0c45f0f943ed..a3b71a4dd71d 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -41,7 +41,6 @@ enum xdp_mem_type {
>  };
>
>  /* XDP flags for ndo_xdp_xmit */
> -#define XDP_XMIT_FLAGS_NONE    0U
>  #define XDP_XMIT_FLUSH         (1U << 0)       /* doorbell signal consumer */
>  #define XDP_XMIT_FLAGS_MASK    XDP_XMIT_FLUSH
>
>

^ permalink raw reply

* Re: [bpf-next V2 PATCH 7/8] bpf/xdp: non-map redirect can avoid calling ndo_xdp_flush
From: Song Liu @ 2018-05-31 16:16 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Networking, Daniel Borkmann, Alexei Starovoitov, Song Liu,
	John Fastabend
In-Reply-To: <152775721817.24817.11576562399044807823.stgit@firesoul>

On Thu, May 31, 2018 at 2:00 AM, Jesper Dangaard Brouer
<brouer@redhat.com> wrote:
> This is the first real user of the XDP_XMIT_FLUSH flag.
>
> As pointed out many times, XDP_REDIRECT without using BPF maps is
> significant slower than the map variant.  This is primary due to the
> lack of bulking, as the ndo_xdp_flush operation is required after each
> frame (to avoid frames hanging on the egress device).
>
> It is still possible to optimize this case.  Instead of invoking two
> NDO indirect calls, which are very expensive with CONFIG_RETPOLINE,
> instead instruct ndo_xdp_xmit to flush via XDP_XMIT_FLUSH flag.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  net/core/filter.c |    3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 6a21dbcad350..6981b4608979 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
>         if (unlikely(!xdpf))
>                 return -EOVERFLOW;
>
> -       sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, 0);
> +       sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
>         if (sent <= 0)
>                 return sent;
> -       dev->netdev_ops->ndo_xdp_flush(dev);
>         return 0;
>  }
>
>

^ permalink raw reply

* Re: [PATCH v2] netfilter: properly initialize xt_table_info structure
From: Greg Kroah-Hartman @ 2018-05-31 16:25 UTC (permalink / raw)
  To: Michal Kubecek
  Cc: peter pi, Florian Westphal, Jan Engelhardt, Eric Dumazet,
	Greg Hackmann, Pablo Neira Ayuso, Jozsef Kadlecsik,
	netfilter-devel, coreteam, netdev
In-Reply-To: <20180531115557.sxfbgtgzy5gh5ldl@unicorn.suse.cz>

On Thu, May 31, 2018 at 01:55:57PM +0200, Michal Kubecek wrote:
> On Thu, May 31, 2018 at 01:32:16PM +0200, Michal Kubecek wrote:
> > I think I start to understand the problem. IPT_SO_GET_ENTRIES leads to
> > calling copy_entries_to_user() which copies the entries as they are to
> > user provided buffer. It also copies instances of struct xt_entry_match
> > and struct xt_entry_target which contain kernel pointers. We then
> > rewrite them with match/target name for userspace but the layout looks
> > (on x86_64) like this
> > 
> > /* offset    |  size */  type = struct xt_entry_match {
> > /*    0      |    32 */    union {
> > /*                32 */        struct {
> > /*    0      |     2 */            __u16 match_size;
> > /*    2      |    29 */            char name[29];
> > /*   31      |     1 */            __u8 revision;
> > 
> >                                    /* total size (bytes):   32 */
> >                                } user;
> > /*                16 */        struct {
> > /*    0      |     2 */            __u16 match_size;
> > /* XXX  6-byte hole  */
> > /*    8      |     8 */            struct xt_match *match;
> > 
> >                                    /* total size (bytes):   16 */
> >                                } kernel;
> > /*                 2 */        __u16 match_size;
> > 
> >                                /* total size (bytes):   32 */
> >                            } u;
> > /*   32      |     0 */    unsigned char data[];
> > 
> >                            /* total size (bytes):   32 */
> >                          }
> > 
> > 
> > so that if match name is no longer than five characters (which is often
> > the case), writing to .u.user.name leaves .u.kernel.match untouched. The
> > same problem exists in struct xt_entry_target.
> 
> And this should no longer happen since the series
> 
>  f32815d21d4d ("xtables: add xt_match, xt_target and data copy_to_user functions")
>  f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers")
>  e47ddb2c4691 ("ip6tables: use match, target and data copy_to_user helpers")
>  244b531bee2b ("arptables: use match, target and data copy_to_user helpers")
>  b5040f6c33a5 ("ebtables: use match, target and data copy_to_user helpers")
>  4915f7bbc402 ("xtables: use match, target and data copy_to_user helpers in compat")
>  ec2318904965 ("xtables: extend matches and targets with .usersize")
> 
> changed the logic in 4.11-rc1.

Thank you so much for the detailed description.  And sorry for digging
up this old issue.  Peter, if you could verify that you do not see this
issue on a kernel newer than 4.11, that would be wonderful.

Michal, do you think it is worth backporting those commits to the 4.9.y
and 4.4.y stable kernels to remove this problem there?

thanks,

greg k-h

^ permalink raw reply

* [PATCH net-next] netfilter: nf_tables: check msg_type before nft_trans_set(trans)
From: Alexey Kodanev @ 2018-05-31 16:53 UTC (permalink / raw)
  To: netfilter-devel
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal, coreteam,
	netdev, Alexey Kodanev

The patch moves the "trans->msg_type == NFT_MSG_NEWSET" check before
using nft_trans_set(trans). Otherwise we can get out of bounds read.

For example, KASAN reported the one when running 0001_cache_handling_0 nft
test. In this case "trans->msg_type" was NFT_MSG_NEWTABLE:

[75517.177808] BUG: KASAN: slab-out-of-bounds in nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75517.279094] Read of size 8 at addr ffff881bdb643fc8 by task nft/7356
...
[75517.375605] CPU: 26 PID: 7356 Comm: nft Tainted: G  E   4.17.0-rc7.1.x86_64 #1
[75517.489587] Hardware name: Oracle Corporation SUN SERVER X4-2
[75517.618129] Call Trace:
[75517.648821]  dump_stack+0xd1/0x13b
[75517.691040]  ? show_regs_print_info+0x5/0x5
[75517.742519]  ? kmsg_dump_rewind_nolock+0xf5/0xf5
[75517.799300]  ? lock_acquire+0x143/0x310
[75517.846738]  print_address_description+0x85/0x3a0
[75517.904547]  kasan_report+0x18d/0x4b0
[75517.949892]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.019153]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.088420]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.157689]  nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.224869]  nf_tables_newsetelem+0x1a5/0x5d0 [nf_tables]
[75518.291024]  ? nft_add_set_elem+0x2280/0x2280 [nf_tables]
[75518.357154]  ? nla_parse+0x1a5/0x300
[75518.401455]  ? kasan_kmalloc+0xa6/0xd0
[75518.447842]  nfnetlink_rcv+0xc43/0x1bdf [nfnetlink]
[75518.507743]  ? nfnetlink_rcv+0x7a5/0x1bdf [nfnetlink]
[75518.569745]  ? nfnl_err_reset+0x3c0/0x3c0 [nfnetlink]
[75518.631711]  ? lock_acquire+0x143/0x310
[75518.679133]  ? netlink_deliver_tap+0x9b/0x1070
[75518.733840]  ? kasan_unpoison_shadow+0x31/0x40
[75518.788542]  netlink_unicast+0x45d/0x680
[75518.837111]  ? __isolate_free_page+0x890/0x890
[75518.891913]  ? netlink_attachskb+0x6b0/0x6b0
[75518.944542]  netlink_sendmsg+0x6fa/0xd30
[75518.993107]  ? netlink_unicast+0x680/0x680
[75519.043758]  ? netlink_unicast+0x680/0x680
[75519.094402]  sock_sendmsg+0xd9/0x160
[75519.138810]  ___sys_sendmsg+0x64d/0x980
[75519.186234]  ? copy_msghdr_from_user+0x350/0x350
[75519.243118]  ? lock_downgrade+0x650/0x650
[75519.292738]  ? do_raw_spin_unlock+0x5d/0x250
[75519.345456]  ? _raw_spin_unlock+0x24/0x30
[75519.395065]  ? __handle_mm_fault+0xbde/0x3410
[75519.448830]  ? sock_setsockopt+0x3d2/0x1940
[75519.500516]  ? __lock_acquire.isra.25+0xdc/0x19d0
[75519.558448]  ? lock_downgrade+0x650/0x650
[75519.608057]  ? __audit_syscall_entry+0x317/0x720
[75519.664960]  ? __fget_light+0x58/0x250
[75519.711325]  ? __sys_sendmsg+0xde/0x170
[75519.758850]  __sys_sendmsg+0xde/0x170
[75519.804193]  ? __ia32_sys_shutdown+0x90/0x90
[75519.856725]  ? syscall_trace_enter+0x897/0x10e0
[75519.912354]  ? trace_event_raw_event_sys_enter+0x920/0x920
[75519.979432]  ? __audit_syscall_entry+0x720/0x720
[75520.036118]  do_syscall_64+0xa3/0x3d0
[75520.081248]  ? prepare_exit_to_usermode+0x47/0x1d0
[75520.139904]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[75520.201680] RIP: 0033:0x7fc153320ba0
[75520.245772] RSP: 002b:00007ffe294c3638 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[75520.337708] RAX: ffffffffffffffda RBX: 00007ffe294c4820 RCX: 00007fc153320ba0
[75520.424547] RDX: 0000000000000000 RSI: 00007ffe294c46b0 RDI: 0000000000000003
[75520.511386] RBP: 00007ffe294c47b0 R08: 0000000000000004 R09: 0000000002114090
[75520.598225] R10: 00007ffe294c30a0 R11: 0000000000000246 R12: 00007ffe294c3660
[75520.684961] R13: 0000000000000001 R14: 00007ffe294c3650 R15: 0000000000000001

[75520.790946] Allocated by task 7356:
[75520.833994]  kasan_kmalloc+0xa6/0xd0
[75520.878088]  __kmalloc+0x189/0x450
[75520.920107]  nft_trans_alloc_gfp+0x20/0x190 [nf_tables]
[75520.983961]  nf_tables_newtable+0xcd0/0x1bd0 [nf_tables]
[75521.048857]  nfnetlink_rcv+0xc43/0x1bdf [nfnetlink]
[75521.108655]  netlink_unicast+0x45d/0x680
[75521.157013]  netlink_sendmsg+0x6fa/0xd30
[75521.205271]  sock_sendmsg+0xd9/0x160
[75521.249365]  ___sys_sendmsg+0x64d/0x980
[75521.296686]  __sys_sendmsg+0xde/0x170
[75521.341822]  do_syscall_64+0xa3/0x3d0
[75521.386957]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[75521.467867] Freed by task 23454:
[75521.507804]  __kasan_slab_free+0x132/0x180
[75521.558137]  kfree+0x14d/0x4d0
[75521.596005]  free_rt_sched_group+0x153/0x280
[75521.648410]  sched_autogroup_create_attach+0x19a/0x520
[75521.711330]  ksys_setsid+0x2ba/0x400
[75521.755529]  __ia32_sys_setsid+0xa/0x10
[75521.802850]  do_syscall_64+0xa3/0x3d0
[75521.848090]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[75521.929000] The buggy address belongs to the object at ffff881bdb643f80
 which belongs to the cache kmalloc-96 of size 96
[75522.079797] The buggy address is located 72 bytes inside of
 96-byte region [ffff881bdb643f80, ffff881bdb643fe0)
[75522.221234] The buggy address belongs to the page:
[75522.280100] page:ffffea006f6d90c0 count:1 mapcount:0 mapping:0000000000000000 index:0x0
[75522.377443] flags: 0x2fffff80000100(slab)
[75522.426956] raw: 002fffff80000100 0000000000000000 0000000000000000 0000000180200020
[75522.521275] raw: ffffea006e6fafc0 0000000c0000000c ffff881bf180f400 0000000000000000
[75522.615601] page dumped because: kasan: bad access detected

Fixes: 37a9cc525525 ("netfilter: nf_tables: add generation mask to sets")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---
 net/netfilter/nf_tables_api.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 91e80aa..dd1f93c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2726,12 +2726,13 @@ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
 	u32 id = ntohl(nla_get_be32(nla));
 
 	list_for_each_entry(trans, &net->nft.commit_list, list) {
-		struct nft_set *set = nft_trans_set(trans);
+		if (trans->msg_type == NFT_MSG_NEWSET) {
+			struct nft_set *set = nft_trans_set(trans);
 
-		if (trans->msg_type == NFT_MSG_NEWSET &&
-		    id == nft_trans_set_id(trans) &&
-		    nft_active_genmask(set, genmask))
-			return set;
+			if (id == nft_trans_set_id(trans) &&
+			    nft_active_genmask(set, genmask))
+				return set;
+		}
 	}
 	return ERR_PTR(-ENOENT);
 }
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v3 net-next] net: stmmac: Add Flexible PPS support
From: Jose Abreu @ 2018-05-31 17:01 UTC (permalink / raw)
  To: netdev
  Cc: Jose Abreu, David S. Miller, Joao Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue, Richard Cochran

This adds support for Flexible PPS output (which is equivalent
to per_out output of PTP subsystem).

Tested using an oscilloscope and the following commands:

1) Start PTP4L:
	# ptp4l -A -4 -H -m -i eth0 &
2) Set Flexible PPS frequency:
	# echo <idx> <ts> <tns> <ps> <pns> > /sys/class/ptp/ptpX/period

Where, ts/tns is start time and ps/pns is period time, and ptpX is ptp
of eth0.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Vitor Soares <soares@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Richard Cochran <richardcochran@gmail.com>
---
Changes from v2:
	- Remove PPS support as we can't input the event to PTP
	subsystem
Changes from v1:
	- Correct kbuild errors in some archs
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |    2 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4.h      |    1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c |    1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c  |    2 +
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c      |   55 +++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h      |   22 ++++++++
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |    7 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   12 +++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    4 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c  |   42 +++++++++++++++-
 10 files changed, 145 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index a679cb7..78fd0f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -346,6 +346,8 @@ struct dma_features {
 	/* TX and RX number of queues */
 	unsigned int number_rx_queues;
 	unsigned int number_tx_queues;
+	/* PPS output */
+	unsigned int pps_out_num;
 	/* Alternate (enhanced) DESC mode */
 	unsigned int enh_desc;
 	/* TX and RX FIFO sizes */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 6330a55..eb013d5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -187,6 +187,7 @@ enum power_event {
 #define GMAC_HW_RXFIFOSIZE		GENMASK(4, 0)
 
 /* MAC HW features2 bitmap */
+#define GMAC_HW_FEAT_PPSOUTNUM		GENMASK(26, 24)
 #define GMAC_HW_FEAT_TXCHCNT		GENMASK(21, 18)
 #define GMAC_HW_FEAT_RXCHCNT		GENMASK(15, 12)
 #define GMAC_HW_FEAT_TXQCNT		GENMASK(9, 6)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index a7121a7..7e5d5db 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -796,6 +796,7 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
 	.safety_feat_irq_status = dwmac5_safety_feat_irq_status,
 	.safety_feat_dump = dwmac5_safety_feat_dump,
 	.rxp_config = dwmac5_rxp_config,
+	.flex_pps_config = dwmac5_flex_pps_config,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index bf8e5a1..d37f17c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -373,6 +373,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 		((hw_cap & GMAC_HW_FEAT_RXQCNT) >> 0) + 1;
 	dma_cap->number_tx_queues =
 		((hw_cap & GMAC_HW_FEAT_TXQCNT) >> 6) + 1;
+	/* PPS output */
+	dma_cap->pps_out_num = (hw_cap & GMAC_HW_FEAT_PPSOUTNUM) >> 24;
 
 	/* IEEE 1588-2002 */
 	dma_cap->time_stamp = 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index b2becb8..3f4f313 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -8,6 +8,7 @@
 #include "dwmac4.h"
 #include "dwmac5.h"
 #include "stmmac.h"
+#include "stmmac_ptp.h"
 
 struct dwmac5_error_desc {
 	bool valid;
@@ -494,3 +495,57 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
 	writel(old_val, ioaddr + GMAC_CONFIG);
 	return ret;
 }
+
+int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
+			   struct stmmac_pps_cfg *cfg, bool enable,
+			   u32 sub_second_inc, u32 systime_flags)
+{
+	u32 tnsec = readl(ioaddr + MAC_PPSx_TARGET_TIME_NSEC(index));
+	u32 val = readl(ioaddr + MAC_PPS_CONTROL);
+	u64 period;
+
+	if (!cfg->available)
+		return -EINVAL;
+	if (tnsec & TRGTBUSY0)
+		return -EBUSY;
+	if (!sub_second_inc || !systime_flags)
+		return -EINVAL;
+
+	val &= ~PPSx_MASK(index);
+
+	if (!enable) {
+		val |= PPSCMDx(index, 0x5);
+		writel(val, ioaddr + MAC_PPS_CONTROL);
+		return 0;
+	}
+
+	val |= PPSCMDx(index, 0x2);
+	val |= TRGTMODSELx(index, 0x2);
+	val |= PPSEN0;
+
+	writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index));
+
+	if (!(systime_flags & PTP_TCR_TSCTRLSSR))
+		cfg->start.tv_nsec = (cfg->start.tv_nsec * 1000) / 465;
+	writel(cfg->start.tv_nsec, ioaddr + MAC_PPSx_TARGET_TIME_NSEC(index));
+
+	period = cfg->period.tv_sec * 1000000000;
+	period += cfg->period.tv_nsec;
+
+	do_div(period, sub_second_inc);
+
+	if (period <= 1)
+		return -EINVAL;
+
+	writel(period - 1, ioaddr + MAC_PPSx_INTERVAL(index));
+
+	period >>= 1;
+	if (period <= 1)
+		return -EINVAL;
+
+	writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index));
+
+	/* Finally, activate it */
+	writel(val, ioaddr + MAC_PPS_CONTROL);
+	return 0;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index cc810af..775db77 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -11,6 +11,25 @@
 #define PRTYEN				BIT(1)
 #define TMOUTEN				BIT(0)
 
+#define MAC_PPS_CONTROL			0x00000b70
+#define PPS_MAXIDX(x)			((((x) + 1) * 8) - 1)
+#define PPS_MINIDX(x)			((x) * 8)
+#define PPSx_MASK(x)			GENMASK(PPS_MAXIDX(x), PPS_MINIDX(x))
+#define MCGRENx(x)			BIT(PPS_MAXIDX(x))
+#define TRGTMODSELx(x, val)		\
+	GENMASK(PPS_MAXIDX(x) - 1, PPS_MAXIDX(x) - 2) & \
+	((val) << (PPS_MAXIDX(x) - 2))
+#define PPSCMDx(x, val)			\
+	GENMASK(PPS_MINIDX(x) + 3, PPS_MINIDX(x)) & \
+	((val) << PPS_MINIDX(x))
+#define PPSEN0				BIT(4)
+#define MAC_PPSx_TARGET_TIME_SEC(x)	(0x00000b80 + ((x) * 0x10))
+#define MAC_PPSx_TARGET_TIME_NSEC(x)	(0x00000b84 + ((x) * 0x10))
+#define TRGTBUSY0			BIT(31)
+#define TTSL0				GENMASK(30, 0)
+#define MAC_PPSx_INTERVAL(x)		(0x00000b88 + ((x) * 0x10))
+#define MAC_PPSx_WIDTH(x)		(0x00000b8c + ((x) * 0x10))
+
 #define MTL_RXP_CONTROL_STATUS		0x00000ca0
 #define RXPI				BIT(31)
 #define NPE				GENMASK(23, 16)
@@ -61,5 +80,8 @@ int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
 			int index, unsigned long *count, const char **desc);
 int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
 		      unsigned int count);
+int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
+			   struct stmmac_pps_cfg *cfg, bool enable,
+			   u32 sub_second_inc, u32 systime_flags);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index f499a7f..e44e7b2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -241,6 +241,7 @@ struct stmmac_dma_ops {
 struct rgmii_adv;
 struct stmmac_safety_stats;
 struct stmmac_tc_entry;
+struct stmmac_pps_cfg;
 
 /* Helpers to program the MAC core */
 struct stmmac_ops {
@@ -313,6 +314,10 @@ struct stmmac_ops {
 	/* Flexible RX Parser */
 	int (*rxp_config)(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
 			  unsigned int count);
+	/* Flexible PPS */
+	int (*flex_pps_config)(void __iomem *ioaddr, int index,
+			       struct stmmac_pps_cfg *cfg, bool enable,
+			       u32 sub_second_inc, u32 systime_flags);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -379,6 +384,8 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, safety_feat_dump, __args)
 #define stmmac_rxp_config(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, rxp_config, __args)
+#define stmmac_flex_pps_config(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, flex_pps_config, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index fbfe5dc..025efbf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -100,6 +100,13 @@ struct stmmac_tc_entry {
 	} __packed val;
 };
 
+#define STMMAC_PPS_MAX		4
+struct stmmac_pps_cfg {
+	bool available;
+	struct timespec64 start;
+	struct timespec64 period;
+};
+
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
 	u32 tx_count_frames;
@@ -160,6 +167,8 @@ struct stmmac_priv {
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_clock_ops;
 	unsigned int default_addend;
+	u32 sub_second_inc;
+	u32 systime_flags;
 	u32 adv_ts;
 	int use_riwt;
 	int irq_wake;
@@ -181,6 +190,9 @@ struct stmmac_priv {
 	unsigned int tc_entries_max;
 	unsigned int tc_off_max;
 	struct stmmac_tc_entry *tc_entries;
+
+	/* Pulse Per Second output */
+	struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
 };
 
 enum stmmac_state {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 77af85c..11fb7c7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -721,6 +721,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 				priv->plat->has_gmac4, &sec_inc);
 		temp = div_u64(1000000000ULL, sec_inc);
 
+		/* Store sub second increment and flags for later use */
+		priv->sub_second_inc = sec_inc;
+		priv->systime_flags = value;
+
 		/* calculate default added value:
 		 * formula is :
 		 * addend = (2^32)/freq_div_ratio;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 7d3a5c7..0cb0e39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -140,17 +140,43 @@ static int stmmac_set_time(struct ptp_clock_info *ptp,
 static int stmmac_enable(struct ptp_clock_info *ptp,
 			 struct ptp_clock_request *rq, int on)
 {
-	return -EOPNOTSUPP;
+	struct stmmac_priv *priv =
+	    container_of(ptp, struct stmmac_priv, ptp_clock_ops);
+	struct stmmac_pps_cfg *cfg;
+	int ret = -EOPNOTSUPP;
+	unsigned long flags;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PEROUT:
+		cfg = &priv->pps[rq->perout.index];
+
+		cfg->start.tv_sec = rq->perout.start.sec;
+		cfg->start.tv_nsec = rq->perout.start.nsec;
+		cfg->period.tv_sec = rq->perout.period.sec;
+		cfg->period.tv_nsec = rq->perout.period.nsec;
+
+		spin_lock_irqsave(&priv->ptp_lock, flags);
+		ret = stmmac_flex_pps_config(priv, priv->ioaddr,
+					     rq->perout.index, cfg, on,
+					     priv->sub_second_inc,
+					     priv->systime_flags);
+		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
 }
 
 /* structure describing a PTP hardware clock */
-static const struct ptp_clock_info stmmac_ptp_clock_ops = {
+static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.owner = THIS_MODULE,
 	.name = "stmmac_ptp_clock",
 	.max_adj = 62500000,
 	.n_alarm = 0,
 	.n_ext_ts = 0,
-	.n_per_out = 0,
+	.n_per_out = 0, /* will be overwritten in stmmac_ptp_register */
 	.n_pins = 0,
 	.pps = 0,
 	.adjfreq = stmmac_adjust_freq,
@@ -168,6 +194,16 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
  */
 void stmmac_ptp_register(struct stmmac_priv *priv)
 {
+	int i;
+
+	for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
+		if (i >= STMMAC_PPS_MAX)
+			break;
+		priv->pps[i].available = true;
+	}
+
+	stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num;
+
 	spin_lock_init(&priv->ptp_lock);
 	priv->ptp_clock_ops = stmmac_ptp_clock_ops;
 
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH rdma-next v3 10/14] IB/uverbs: Add support for flow counters
From: Leon Romanovsky @ 2018-05-31 17:23 UTC (permalink / raw)
  To: Ruhl, Michael J
  Cc: Doug Ledford, Jason Gunthorpe, RDMA mailing list, Boris Pismenny,
	Matan Barak, Or Gerlitz, Raed Salem, Yishai Hadas, Saeed Mahameed,
	linux-netdev
In-Reply-To: <14063C7AD467DE4B82DEDB5C278E8663B38F0661@FMSMSX108.amr.corp.intel.com>

[-- Attachment #1: Type: text/plain, Size: 6930 bytes --]

On Thu, May 31, 2018 at 02:49:44PM +0000, Ruhl, Michael J wrote:
> >-----Original Message-----
> >From: Leon Romanovsky [mailto:leon@kernel.org]
> >Sent: Thursday, May 31, 2018 9:44 AM
> >To: Doug Ledford <dledford@redhat.com>; Jason Gunthorpe
> ><jgg@mellanox.com>
> >Cc: Leon Romanovsky <leonro@mellanox.com>; RDMA mailing list <linux-
> >rdma@vger.kernel.org>; Boris Pismenny <borisp@mellanox.com>; Matan
> >Barak <matanb@mellanox.com>; Ruhl, Michael J <michael.j.ruhl@intel.com>;
> >Or Gerlitz <ogerlitz@mellanox.com>; Raed Salem <raeds@mellanox.com>;
> >Yishai Hadas <yishaih@mellanox.com>; Saeed Mahameed
> ><saeedm@mellanox.com>; linux-netdev <netdev@vger.kernel.org>
> >Subject: [PATCH rdma-next v3 10/14] IB/uverbs: Add support for flow
> >counters
> >
> >From: Raed Salem <raeds@mellanox.com>
> >
> >The struct ib_uverbs_flow_spec_action_count associates
> >a counters object with the flow.
> >
> >Post this association the flow counters can be read via
> >the counters object.
> >
> >Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
> >Signed-off-by: Raed Salem <raeds@mellanox.com>
> >Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> >---
> > drivers/infiniband/core/uverbs.h     |  1 +
> > drivers/infiniband/core/uverbs_cmd.c | 81
> >+++++++++++++++++++++++++++++++-----
> > include/uapi/rdma/ib_user_verbs.h    | 13 ++++++
> > 3 files changed, 84 insertions(+), 11 deletions(-)
> >
> >diff --git a/drivers/infiniband/core/uverbs.h
> >b/drivers/infiniband/core/uverbs.h
> >index 5b2461fa634d..c0d40fc3a53a 100644
> >--- a/drivers/infiniband/core/uverbs.h
> >+++ b/drivers/infiniband/core/uverbs.h
> >@@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec {
> > 		struct ib_uverbs_flow_spec_action_tag	flow_tag;
> > 		struct ib_uverbs_flow_spec_action_drop	drop;
> > 		struct ib_uverbs_flow_spec_action_handle action;
> >+		struct ib_uverbs_flow_spec_action_count flow_count;
> > 	};
> > };
> >
> >diff --git a/drivers/infiniband/core/uverbs_cmd.c
> >b/drivers/infiniband/core/uverbs_cmd.c
> >index ddb9d79691be..3179a95c6f5e 100644
> >--- a/drivers/infiniband/core/uverbs_cmd.c
> >+++ b/drivers/infiniband/core/uverbs_cmd.c
> >@@ -2748,43 +2748,82 @@ ssize_t ib_uverbs_detach_mcast(struct
> >ib_uverbs_file *file,
> > struct ib_uflow_resources {
> > 	size_t			max;
> > 	size_t			num;
> >-	struct ib_flow_action	*collection[0];
> >+	size_t			collection_num;
> >+	size_t			counters_num;
> >+	struct ib_counters	**counters;
> >+	struct ib_flow_action	**collection;
> > };
> >
> > static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
> > {
> > 	struct ib_uflow_resources *resources;
> >
> >-	resources =
> >-		kmalloc(sizeof(*resources) +
> >-			num_specs * sizeof(*resources->collection),
> >GFP_KERNEL);
> >+	resources = kzalloc(sizeof(*resources), GFP_KERNEL);
> >
> > 	if (!resources)
> >-		return NULL;
> >+		goto err_res;
>
> Why the new goto?

No real reason :)

>
> >+
> >+	resources->counters =
> >+		kcalloc(num_specs, sizeof(*resources->counters),
> >GFP_KERNEL);
> >+
> >+	if (!resources->counters)
> >+		goto err_cnt;
>
> kcalloc() zeros stuff.  Could you just have a single common goto for the
> cleanup?

I have mixed feelings regarding such approach, technically you are
right, but I think that it will hurt readability.

I can send followup patch, will it work for you?

Thanks for review.

>
> Mike
>
> >+
> >+	resources->collection =
> >+		kcalloc(num_specs, sizeof(*resources->collection),
> >GFP_KERNEL);
> >+
> >+	if (!resources->collection)
> >+		goto err_collection;
> >
> >-	resources->num = 0;
> > 	resources->max = num_specs;
> >
> > 	return resources;
> >+
> >+err_collection:
> >+	kfree(resources->counters);
> >+err_cnt:
> >+	kfree(resources);
> >+err_res:
> >+	return NULL;
> > }
> >
> > void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
> > {
> > 	unsigned int i;
> >
> >-	for (i = 0; i < uflow_res->num; i++)
> >+	for (i = 0; i < uflow_res->collection_num; i++)
> > 		atomic_dec(&uflow_res->collection[i]->usecnt);
> >
> >+	for (i = 0; i < uflow_res->counters_num; i++)
> >+		atomic_dec(&uflow_res->counters[i]->usecnt);
> >+
> >+	kfree(uflow_res->collection);
> >+	kfree(uflow_res->counters);
> > 	kfree(uflow_res);
> > }
> >
> > static void flow_resources_add(struct ib_uflow_resources *uflow_res,
> >-			       struct ib_flow_action *action)
> >+			       enum ib_flow_spec_type type,
> >+			       void *ibobj)
> > {
> > 	WARN_ON(uflow_res->num >= uflow_res->max);
> >
> >-	atomic_inc(&action->usecnt);
> >-	uflow_res->collection[uflow_res->num++] = action;
> >+	switch (type) {
> >+	case IB_FLOW_SPEC_ACTION_HANDLE:
> >+		atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
> >+		uflow_res->collection[uflow_res->collection_num++] =
> >+			(struct ib_flow_action *)ibobj;
> >+		break;
> >+	case IB_FLOW_SPEC_ACTION_COUNT:
> >+		atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
> >+		uflow_res->counters[uflow_res->counters_num++] =
> >+			(struct ib_counters *)ibobj;
> >+		break;
> >+	default:
> >+		WARN_ON(1);
> >+	}
> >+
> >+	uflow_res->num++;
> > }
> >
> > static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
> >@@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct
> >ib_ucontext *ucontext,
> > 			return -EINVAL;
> > 		ib_spec->action.size =
> > 			sizeof(struct ib_flow_spec_action_handle);
> >-		flow_resources_add(uflow_res, ib_spec->action.act);
> >+		flow_resources_add(uflow_res,
> >+				   IB_FLOW_SPEC_ACTION_HANDLE,
> >+				   ib_spec->action.act);
> > 		uobj_put_obj_read(ib_spec->action.act);
> > 		break;
> >+	case IB_FLOW_SPEC_ACTION_COUNT:
> >+		if (kern_spec->flow_count.size !=
> >+			sizeof(struct ib_uverbs_flow_spec_action_count))
> >+			return -EINVAL;
> >+		ib_spec->flow_count.counters =
> >+			uobj_get_obj_read(counters,
> >+					  UVERBS_OBJECT_COUNTERS,
> >+					  kern_spec->flow_count.handle,
> >+					  ucontext);
> >+		if (!ib_spec->flow_count.counters)
> >+			return -EINVAL;
> >+		ib_spec->flow_count.size =
> >+				sizeof(struct ib_flow_spec_action_count);
> >+		flow_resources_add(uflow_res,
> >+				   IB_FLOW_SPEC_ACTION_COUNT,
> >+				   ib_spec->flow_count.counters);
> >+		uobj_put_obj_read(ib_spec->flow_count.counters);
> >+		break;
> > 	default:
> > 		return -EINVAL;
> > 	}
> >diff --git a/include/uapi/rdma/ib_user_verbs.h
> >b/include/uapi/rdma/ib_user_verbs.h
> >index 409507f83b91..4f9991de8e3a 100644
> >--- a/include/uapi/rdma/ib_user_verbs.h
> >+++ b/include/uapi/rdma/ib_user_verbs.h
> >@@ -998,6 +998,19 @@ struct ib_uverbs_flow_spec_action_handle {
> > 	__u32			      reserved1;
> > };
> >
> >+struct ib_uverbs_flow_spec_action_count {
> >+	union {
> >+		struct ib_uverbs_flow_spec_hdr hdr;
> >+		struct {
> >+			__u32 type;
> >+			__u16 size;
> >+			__u16 reserved;
> >+		};
> >+	};
> >+	__u32			      handle;
> >+	__u32			      reserved1;
> >+};
> >+
> > struct ib_uverbs_flow_tunnel_filter {
> > 	__be32 tunnel_id;
> > };
> >--
> >2.14.3
>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox