* [PATCH mlx5-next v2 1/6] net/mlx5: Refactor HCA capability set flow
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
@ 2020-04-13 13:36 ` Leon Romanovsky
2020-04-13 20:47 ` Saeed Mahameed
2020-04-13 13:36 ` [PATCH mlx5-next v2 2/6] net/mlx5: Enable SW-defined RoCEv2 UDP source port Leon Romanovsky
` (5 subsequent siblings)
6 siblings, 1 reply; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-13 13:36 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Leon Romanovsky, linux-rdma, netdev, Saeed Mahameed
From: Leon Romanovsky <leonro@mellanox.com>
Reduce the amount of kzalloc/kfree cycles by allocating
command structure in the parent function and leverage the
knowledge that set_caps() is called for HCA capabilities
only with specific HW structure as parameter to calculate
mailbox size.
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/main.c | 66 +++++++------------
1 file changed, 24 insertions(+), 42 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7af4210c1b96..8b9182add689 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -407,20 +407,19 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
}
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
{
- u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {};
MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
- return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ return mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(set_hca_cap_in), out,
+ sizeof(out));
}
-static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
{
- void *set_ctx;
void *set_hca_cap;
- int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
int req_endianness;
int err;
@@ -439,27 +438,19 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
return 0;
- set_ctx = kzalloc(set_sz, GFP_KERNEL);
- if (!set_ctx)
- return -ENOMEM;
-
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
/* Set requestor to host endianness */
MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
- err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
-
- kfree(set_ctx);
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
return err;
}
-static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
{
void *set_hca_cap;
- void *set_ctx;
- int set_sz;
bool do_set = false;
int err;
@@ -471,11 +462,6 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
if (err)
return err;
- set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
- set_ctx = kzalloc(set_sz, GFP_KERNEL);
- if (!set_ctx)
- return -ENOMEM;
-
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
MLX5_ST_SZ_BYTES(odp_cap));
@@ -505,29 +491,20 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
if (do_set)
- err = set_caps(dev, set_ctx, set_sz,
- MLX5_SET_HCA_CAP_OP_MOD_ODP);
-
- kfree(set_ctx);
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
return err;
}
-static int handle_hca_cap(struct mlx5_core_dev *dev)
+static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
{
- void *set_ctx = NULL;
struct mlx5_profile *prof = dev->profile;
- int err = -ENOMEM;
- int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
void *set_hca_cap;
-
- set_ctx = kzalloc(set_sz, GFP_KERNEL);
- if (!set_ctx)
- goto query_ex;
+ int err;
err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
if (err)
- goto query_ex;
+ return err;
set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
capability);
@@ -578,37 +555,42 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
num_vhca_ports,
MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
- err = set_caps(dev, set_ctx, set_sz,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
-
-query_ex:
- kfree(set_ctx);
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
return err;
}
static int set_hca_cap(struct mlx5_core_dev *dev)
{
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_ctx;
int err;
- err = handle_hca_cap(dev);
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ err = handle_hca_cap(dev, set_ctx);
if (err) {
mlx5_core_err(dev, "handle_hca_cap failed\n");
goto out;
}
- err = handle_hca_cap_atomic(dev);
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_atomic(dev, set_ctx);
if (err) {
mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
goto out;
}
- err = handle_hca_cap_odp(dev);
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_odp(dev, set_ctx);
if (err) {
mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
goto out;
}
out:
+ kfree(set_ctx);
return err;
}
--
2.25.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH mlx5-next v2 1/6] net/mlx5: Refactor HCA capability set flow
2020-04-13 13:36 ` [PATCH mlx5-next v2 1/6] net/mlx5: Refactor HCA capability set flow Leon Romanovsky
@ 2020-04-13 20:47 ` Saeed Mahameed
2020-04-14 7:23 ` Leon Romanovsky
0 siblings, 1 reply; 10+ messages in thread
From: Saeed Mahameed @ 2020-04-13 20:47 UTC (permalink / raw)
To: Jason Gunthorpe, leon@kernel.org, dledford@redhat.com
Cc: netdev@vger.kernel.org, Leon Romanovsky,
linux-rdma@vger.kernel.org
On Mon, 2020-04-13 at 16:36 +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
>
> Reduce the amount of kzalloc/kfree cycles by allocating
> command structure in the parent function and leverage the
> knowledge that set_caps() is called for HCA capabilities
> only with specific HW structure as parameter to calculate
> mailbox size.
>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
> .../net/ethernet/mellanox/mlx5/core/main.c | 66 +++++++--------
> ----
> 1 file changed, 24 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 7af4210c1b96..8b9182add689 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -407,20 +407,19 @@ int mlx5_core_get_caps(struct mlx5_core_dev
> *dev, enum mlx5_cap_type cap_type)
> return mlx5_core_get_caps_mode(dev, cap_type,
> HCA_CAP_OPMOD_GET_MAX);
> }
>
> -static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz,
> int opmod)
> +static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
> {
> - u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
> + u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {};
>
> MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
> MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
> - return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
> + return mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(set_hca_cap_in),
> out,
> + sizeof(out));
> }
>
> -static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
> +static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void
> *set_ctx)
> {
> - void *set_ctx;
> void *set_hca_cap;
> - int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> int req_endianness;
> int err;
>
> @@ -439,27 +438,19 @@ static int handle_hca_cap_atomic(struct
> mlx5_core_dev *dev)
> if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
> return 0;
>
> - set_ctx = kzalloc(set_sz, GFP_KERNEL);
> - if (!set_ctx)
> - return -ENOMEM;
> -
> set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
> capability);
>
> /* Set requestor to host endianness */
> MLX5_SET(atomic_caps, set_hca_cap,
> atomic_req_8B_endianness_mode,
> MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
>
> - err = set_caps(dev, set_ctx, set_sz,
> MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
> -
> - kfree(set_ctx);
> + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
> return err;
> }
>
> -static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
> +static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void
> *set_ctx)
> {
> void *set_hca_cap;
> - void *set_ctx;
> - int set_sz;
> bool do_set = false;
> int err;
>
> @@ -471,11 +462,6 @@ static int handle_hca_cap_odp(struct
> mlx5_core_dev *dev)
> if (err)
> return err;
>
> - set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> - set_ctx = kzalloc(set_sz, GFP_KERNEL);
> - if (!set_ctx)
> - return -ENOMEM;
> -
> set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
> capability);
> memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
> MLX5_ST_SZ_BYTES(odp_cap));
> @@ -505,29 +491,20 @@ static int handle_hca_cap_odp(struct
> mlx5_core_dev *dev)
> ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
>
> if (do_set)
assigning to err is now redundant in all of the functions and the next
patch.
you can do early exits when required and then just "return set_caps();"
in this example:
if (!do_set)
return 0;
return set_caps(...);
> - err = set_caps(dev, set_ctx, set_sz,
> - MLX5_SET_HCA_CAP_OP_MOD_ODP);
> -
> - kfree(set_ctx);
> + err = set_caps(dev, set_ctx,
> MLX5_SET_HCA_CAP_OP_MOD_ODP);
>
> return err;
> }
>
> -static int handle_hca_cap(struct mlx5_core_dev *dev)
> +static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
> {
> - void *set_ctx = NULL;
> struct mlx5_profile *prof = dev->profile;
> - int err = -ENOMEM;
> - int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> void *set_hca_cap;
> -
> - set_ctx = kzalloc(set_sz, GFP_KERNEL);
> - if (!set_ctx)
> - goto query_ex;
> + int err;
>
> err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
> if (err)
> - goto query_ex;
> + return err;
>
> set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
> capability);
> @@ -578,37 +555,42 @@ static int handle_hca_cap(struct mlx5_core_dev
> *dev)
> num_vhca_ports,
> MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
>
> - err = set_caps(dev, set_ctx, set_sz,
> - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> -
> -query_ex:
> - kfree(set_ctx);
> + err = set_caps(dev, set_ctx,
> MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> return err;
just return set_caps();
other than this the series is ok,
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
> }
>
> static int set_hca_cap(struct mlx5_core_dev *dev)
> {
> + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> + void *set_ctx;
> int err;
>
> - err = handle_hca_cap(dev);
> + set_ctx = kzalloc(set_sz, GFP_KERNEL);
> + if (!set_ctx)
> + return -ENOMEM;
> +
> + err = handle_hca_cap(dev, set_ctx);
> if (err) {
> mlx5_core_err(dev, "handle_hca_cap failed\n");
> goto out;
> }
>
> - err = handle_hca_cap_atomic(dev);
> + memset(set_ctx, 0, set_sz);
> + err = handle_hca_cap_atomic(dev, set_ctx);
> if (err) {
> mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
> goto out;
> }
>
> - err = handle_hca_cap_odp(dev);
> + memset(set_ctx, 0, set_sz);
> + err = handle_hca_cap_odp(dev, set_ctx);
> if (err) {
> mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
> goto out;
> }
>
> out:
> + kfree(set_ctx);
> return err;
> }
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH mlx5-next v2 1/6] net/mlx5: Refactor HCA capability set flow
2020-04-13 20:47 ` Saeed Mahameed
@ 2020-04-14 7:23 ` Leon Romanovsky
0 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-14 7:23 UTC (permalink / raw)
To: Saeed Mahameed
Cc: Jason Gunthorpe, dledford@redhat.com, netdev@vger.kernel.org,
linux-rdma@vger.kernel.org
On Mon, Apr 13, 2020 at 08:47:39PM +0000, Saeed Mahameed wrote:
> On Mon, 2020-04-13 at 16:36 +0300, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@mellanox.com>
> >
> > Reduce the amount of kzalloc/kfree cycles by allocating
> > command structure in the parent function and leverage the
> > knowledge that set_caps() is called for HCA capabilities
> > only with specific HW structure as parameter to calculate
> > mailbox size.
> >
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > ---
> > .../net/ethernet/mellanox/mlx5/core/main.c | 66 +++++++--------
> > ----
> > 1 file changed, 24 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> > b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> > index 7af4210c1b96..8b9182add689 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> > @@ -407,20 +407,19 @@ int mlx5_core_get_caps(struct mlx5_core_dev
> > *dev, enum mlx5_cap_type cap_type)
> > return mlx5_core_get_caps_mode(dev, cap_type,
> > HCA_CAP_OPMOD_GET_MAX);
> > }
> >
> > -static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz,
> > int opmod)
> > +static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
> > {
> > - u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
> > + u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {};
> >
> > MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
> > MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
> > - return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
> > + return mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(set_hca_cap_in),
> > out,
> > + sizeof(out));
> > }
> >
> > -static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
> > +static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void
> > *set_ctx)
> > {
> > - void *set_ctx;
> > void *set_hca_cap;
> > - int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> > int req_endianness;
> > int err;
> >
> > @@ -439,27 +438,19 @@ static int handle_hca_cap_atomic(struct
> > mlx5_core_dev *dev)
> > if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
> > return 0;
> >
> > - set_ctx = kzalloc(set_sz, GFP_KERNEL);
> > - if (!set_ctx)
> > - return -ENOMEM;
> > -
> > set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
> > capability);
> >
> > /* Set requestor to host endianness */
> > MLX5_SET(atomic_caps, set_hca_cap,
> > atomic_req_8B_endianness_mode,
> > MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
> >
> > - err = set_caps(dev, set_ctx, set_sz,
> > MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
> > -
> > - kfree(set_ctx);
> > + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
> > return err;
> > }
> >
> > -static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
> > +static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void
> > *set_ctx)
> > {
> > void *set_hca_cap;
> > - void *set_ctx;
> > - int set_sz;
> > bool do_set = false;
> > int err;
> >
> > @@ -471,11 +462,6 @@ static int handle_hca_cap_odp(struct
> > mlx5_core_dev *dev)
> > if (err)
> > return err;
> >
> > - set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> > - set_ctx = kzalloc(set_sz, GFP_KERNEL);
> > - if (!set_ctx)
> > - return -ENOMEM;
> > -
> > set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
> > capability);
> > memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
> > MLX5_ST_SZ_BYTES(odp_cap));
> > @@ -505,29 +491,20 @@ static int handle_hca_cap_odp(struct
> > mlx5_core_dev *dev)
> > ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
> >
> > if (do_set)
>
> assigning to err is now redundant in all of the functions and the next
> patch.
>
> you can do early exits when required and then just "return set_caps();"
>
>
> in this example:
>
> if (!do_set)
> return 0;
>
> return set_caps(...);
>
>
> > - err = set_caps(dev, set_ctx, set_sz,
> > - MLX5_SET_HCA_CAP_OP_MOD_ODP);
> > -
> > - kfree(set_ctx);
> > + err = set_caps(dev, set_ctx,
> > MLX5_SET_HCA_CAP_OP_MOD_ODP);
> >
> > return err;
> > }
> >
> > -static int handle_hca_cap(struct mlx5_core_dev *dev)
> > +static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
> > {
> > - void *set_ctx = NULL;
> > struct mlx5_profile *prof = dev->profile;
> > - int err = -ENOMEM;
> > - int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
> > void *set_hca_cap;
> > -
> > - set_ctx = kzalloc(set_sz, GFP_KERNEL);
> > - if (!set_ctx)
> > - goto query_ex;
> > + int err;
> >
> > err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
> > if (err)
> > - goto query_ex;
> > + return err;
> >
> > set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
> > capability);
> > @@ -578,37 +555,42 @@ static int handle_hca_cap(struct mlx5_core_dev
> > *dev)
> > num_vhca_ports,
> > MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
> >
> > - err = set_caps(dev, set_ctx, set_sz,
> > - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> > -
> > -query_ex:
> > - kfree(set_ctx);
> > + err = set_caps(dev, set_ctx,
> > MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> > return err;
>
> just return set_caps();
>
> other than this the series is ok,
>
> Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Thanks, I'll fix while will take to mlx5-next.
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH mlx5-next v2 2/6] net/mlx5: Enable SW-defined RoCEv2 UDP source port
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
2020-04-13 13:36 ` [PATCH mlx5-next v2 1/6] net/mlx5: Refactor HCA capability set flow Leon Romanovsky
@ 2020-04-13 13:36 ` Leon Romanovsky
2020-04-13 13:37 ` [PATCH rdma-next v2 3/6] RDMA/core: Add hash functions to calculate RoCEv2 flowlabel and " Leon Romanovsky
` (4 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-13 13:36 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: Mark Zhang, linux-rdma, Maor Gottlieb, netdev, Saeed Mahameed
From: Mark Zhang <markz@mellanox.com>
When this is enabled, UDP source port for RoCEv2 packets are defined
by software instead of firmware.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/main.c | 32 +++++++++++++++++++
include/linux/mlx5/mlx5_ifc.h | 5 ++-
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8b9182add689..bbe1a2110204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -559,6 +559,31 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
return err;
}
+static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
+{
+ void *set_hca_cap;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, roce))
+ return 0;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
+ !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
+ return 0;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ROCE],
+ MLX5_ST_SZ_BYTES(roce_cap));
+ MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+
+ err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+ return err;
+}
+
static int set_hca_cap(struct mlx5_core_dev *dev)
{
int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
@@ -589,6 +614,13 @@ static int set_hca_cap(struct mlx5_core_dev *dev)
goto out;
}
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_roce(dev, set_ctx);
+ if (err) {
+ mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
+ goto out;
+ }
+
out:
kfree(set_ctx);
return err;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a3b6c92e889e..95dd9cc1d979 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -74,6 +74,7 @@ enum {
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0,
MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2,
MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
+ MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4,
};
enum {
@@ -903,7 +904,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
struct mlx5_ifc_roce_cap_bits {
u8 roce_apm[0x1];
- u8 reserved_at_1[0x1f];
+ u8 reserved_at_1[0x3];
+ u8 sw_r_roce_src_udp_port[0x1];
+ u8 reserved_at_5[0x1b];
u8 reserved_at_20[0x60];
--
2.25.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH rdma-next v2 3/6] RDMA/core: Add hash functions to calculate RoCEv2 flowlabel and UDP source port
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
2020-04-13 13:36 ` [PATCH mlx5-next v2 1/6] net/mlx5: Refactor HCA capability set flow Leon Romanovsky
2020-04-13 13:36 ` [PATCH mlx5-next v2 2/6] net/mlx5: Enable SW-defined RoCEv2 UDP source port Leon Romanovsky
@ 2020-04-13 13:37 ` Leon Romanovsky
2020-04-13 13:37 ` [PATCH rdma-next v2 4/6] RDMA/mlx5: Define RoCEv2 udp source port when set path Leon Romanovsky
` (3 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-13 13:37 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma, Maor Gottlieb
From: Mark Zhang <markz@mellanox.com>
Add two hash functions to distribute RoCE v2 UDP source and Flowlabel
symmetrically. These are user visible API and any change in the
implementation needs to be tested for inter-operability between old and
new variant.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
include/rdma/ib_verbs.h | 44 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 60f9969b6d83..8763d4a06eb7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4703,4 +4703,48 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
bool rdma_dev_access_netns(const struct ib_device *device,
const struct net *net);
+
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
+
+/**
+ * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
+ * on the flow_label
+ *
+ * This function will convert the 20 bit flow_label input to a valid RoCE v2
+ * UDP src port 14 bit value. All RoCE V2 drivers should use this same
+ * convention.
+ */
+static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
+{
+ u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
+
+ fl_low ^= fl_high >> 14;
+ return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
+}
+
+/**
+ * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
+ * local and remote qpn values
+ *
+ * This function folded the multiplication results of two qpns, 24 bit each,
+ * fields, and converts it to a 20 bit results.
+ *
+ * This function will create symmetric flow_label value based on the local
+ * and remote qpn values. this will allow both the requester and responder
+ * to calculate the same flow_label for a given connection.
+ *
+ * This helper function should be used by driver in case the upper layer
+ * provide a zero flow_label value. This is to improve entropy of RDMA
+ * traffic in the network.
+ */
+static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
+{
+ u64 v = (u64)lqpn * rqpn;
+
+ v ^= v >> 20;
+ v ^= v >> 40;
+
+ return (u32)(v & IB_GRH_FLOWLABEL_MASK);
+}
#endif /* IB_VERBS_H */
--
2.25.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH rdma-next v2 4/6] RDMA/mlx5: Define RoCEv2 udp source port when set path
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
` (2 preceding siblings ...)
2020-04-13 13:37 ` [PATCH rdma-next v2 3/6] RDMA/core: Add hash functions to calculate RoCEv2 flowlabel and " Leon Romanovsky
@ 2020-04-13 13:37 ` Leon Romanovsky
2020-04-13 13:37 ` [PATCH rdma-next v2 5/6] RDMA/cma: Initialize the flow label of CM's route path record Leon Romanovsky
` (2 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-13 13:37 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma, Maor Gottlieb
From: Mark Zhang <markz@mellanox.com>
Calculate and set UDP source port based on the flow label. If flow label is
not defined in GRH then calculate it based on lqpn/rqpn.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/qp.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 9e9ad69152f7..e7083ab3bcd2 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2967,6 +2967,21 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
return err;
}
+static void mlx5_set_path_udp_sport(struct mlx5_qp_path *path,
+ const struct rdma_ah_attr *ah,
+ u32 lqpn, u32 rqpn)
+
+{
+ u32 fl = ah->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ sport = rdma_flow_label_to_udp_sport(fl);
+ path->udp_sport = cpu_to_be16(sport);
+}
+
static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
const struct rdma_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
@@ -2998,12 +3013,15 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EINVAL;
memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
- if (qp->ibqp.qp_type == IB_QPT_RC ||
- qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- qp->ibqp.qp_type == IB_QPT_XRC_TGT)
- path->udp_sport =
- mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
+ if ((qp->ibqp.qp_type == IB_QPT_RC ||
+ qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+ (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (attr_mask & IB_QP_DEST_QPN))
+ mlx5_set_path_udp_sport(path, ah,
+ qp->ibqp.qp_num,
+ attr->dest_qp_num);
path->dci_cfi_prio_sl = (sl & 0x7) << 4;
gid_type = ah->grh.sgid_attr->gid_type;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
--
2.25.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH rdma-next v2 5/6] RDMA/cma: Initialize the flow label of CM's route path record
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
` (3 preceding siblings ...)
2020-04-13 13:37 ` [PATCH rdma-next v2 4/6] RDMA/mlx5: Define RoCEv2 udp source port when set path Leon Romanovsky
@ 2020-04-13 13:37 ` Leon Romanovsky
2020-04-13 13:37 ` [PATCH rdma-next v2 6/6] RDMA/mlx5: Set UDP source port based on the grh.flow_label Leon Romanovsky
2020-04-19 13:02 ` [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
6 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-13 13:37 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma, Maor Gottlieb
From: Mark Zhang <markz@mellanox.com>
If flow label is not set by the user or it's not IPv4, initialize it with
the cma src/dst based on the "Kernighan and Ritchie's hash function".
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/cma.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 26e6f7df247b..bc66a0d39068 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2904,6 +2904,24 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2970,6 +2988,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+
cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
--
2.25.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH rdma-next v2 6/6] RDMA/mlx5: Set UDP source port based on the grh.flow_label
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
` (4 preceding siblings ...)
2020-04-13 13:37 ` [PATCH rdma-next v2 5/6] RDMA/cma: Initialize the flow label of CM's route path record Leon Romanovsky
@ 2020-04-13 13:37 ` Leon Romanovsky
2020-04-19 13:02 ` [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
6 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-13 13:37 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Mark Zhang, linux-rdma, Maor Gottlieb
From: Mark Zhang <markz@mellanox.com>
Calculate UDP source port based on the grh.flow_label. If grh.flow_label
is not valid, we will use minimal supported UDP source port.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/ah.c | 21 +++++++++++++++++++--
drivers/infiniband/hw/mlx5/main.c | 4 ++--
drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++--
3 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 14ad05e7c5bf..5acf1bfb73fe 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,6 +32,24 @@
#include "mlx5_ib.h"
+static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
+ const struct rdma_ah_attr *ah_attr)
+{
+ enum ib_gid_type gid_type = ah_attr->grh.sgid_attr->gid_type;
+ __be16 sport;
+
+ if ((gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) &&
+ (ah_attr->grh.flow_label & IB_GRH_FLOWLABEL_MASK))
+ sport = cpu_to_be16(
+ rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ else
+ sport = mlx5_get_roce_udp_sport_min(dev,
+ ah_attr->grh.sgid_attr);
+
+ return sport;
+}
+
static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
struct rdma_ah_attr *ah_attr)
{
@@ -59,8 +77,7 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
memcpy(ah->av.rmac, ah_attr->roce.dmac,
sizeof(ah_attr->roce.dmac));
- ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
+ ah->av.udp_sport = mlx5_ah_get_udp_sport(dev, ah_attr);
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
#define MLX5_ECN_ENABLED BIT(1)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2db2309dde47..953c1d6b23aa 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -628,8 +628,8 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
attr->index, NULL, NULL);
}
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
- const struct ib_gid_attr *attr)
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr)
{
if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a7b5581a7a4d..61ea8fc70787 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1383,8 +1383,8 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
u64 guid, int type);
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
- const struct ib_gid_attr *attr);
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr);
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
--
2.25.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP
2020-04-13 13:36 [PATCH rdma-next v2 0/6] Set flow_label and RoCEv2 UDP source port for datagram QP Leon Romanovsky
` (5 preceding siblings ...)
2020-04-13 13:37 ` [PATCH rdma-next v2 6/6] RDMA/mlx5: Set UDP source port based on the grh.flow_label Leon Romanovsky
@ 2020-04-19 13:02 ` Leon Romanovsky
6 siblings, 0 replies; 10+ messages in thread
From: Leon Romanovsky @ 2020-04-19 13:02 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe
Cc: linux-rdma, Maor Gottlieb, Mark Zhang, netdev, Saeed Mahameed
On Mon, Apr 13, 2020 at 04:36:57PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
>
> Changelog:
> v2: Dropped patch "RDMA/cm: Set flow label of recv_wc based on primary
> flow label", because it violates IBTA 13.5.4.3/13.5.4.4 sections.
> v1: Added extra patch to reduce amount of kzalloc/kfree calls in
> the HCA set capability flow.
> https://lore.kernel.org/lkml/20200322093031.918447-1-leon@kernel.org
> v0: https://lore.kernel.org/linux-rdma/20200318095300.45574-1-leon@kernel.org
> --------------------------------
>
> From Mark:
>
> This series provide flow label and UDP source port definition in RoCE v2.
> Those fields are used to create entropy for network routes (ECMP), load
> balancers and 802.3ad link aggregation switching that are not aware of
> RoCE headers.
>
> Thanks.
>
> Leon Romanovsky (1):
> net/mlx5: Refactor HCA capability set flow
>
> Mark Zhang (5):
> net/mlx5: Enable SW-defined RoCEv2 UDP source port
Those two patches were applied to the mlx5-next, please pull.
> RDMA/core: Add hash functions to calculate RoCEv2 flowlabel and UDP
> source port
> RDMA/mlx5: Define RoCEv2 udp source port when set path
> RDMA/cma: Initialize the flow label of CM's route path record
> RDMA/mlx5: Set UDP source port based on the grh.flow_label
Those patches need to go to the RDMA repo.
Thanks
^ permalink raw reply [flat|nested] 10+ messages in thread