Linux RDMA and InfiniBand development

Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed

* RE: [PATCH rdma-next] RDMA/core: Annotate destroy of mutex to ensure that it is released as unlocked
From: Parav Pandit @ 2019-07-09  5:25 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky
  Cc: Doug Ledford, RDMA mailing list, Leon Romanovsky
In-Reply-To: <20190708200114.GA25699@ziepe.ca>



> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Tuesday, July 9, 2019 1:31 AM
> To: Leon Romanovsky <leon@kernel.org>
> Cc: Doug Ledford <dledford@redhat.com>; Parav Pandit
> <parav@mellanox.com>; RDMA mailing list <linux-rdma@vger.kernel.org>;
> Leon Romanovsky <leonro@mellanox.com>
> Subject: Re: [PATCH rdma-next] RDMA/core: Annotate destroy of mutex to
> ensure that it is released as unlocked
> 
> On Thu, Jul 04, 2019 at 04:00:12PM +0300, Leon Romanovsky wrote:
> > From: Parav Pandit <parav@mellanox.com>
> >
> > While compiled with CONFIG_DEBUG_MUTEXES, the kernel ensures that
> > mutex is not held during destroy.
> > Hence add mutex_destroy() for mutexes used in RDMA modules.
> >
> > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> >  drivers/infiniband/core/cache.c        | 1 +
> >  drivers/infiniband/core/cma_configfs.c | 1 +
> >  drivers/infiniband/core/device.c       | 3 +++
> >  drivers/infiniband/core/user_mad.c     | 2 +-
> >  drivers/infiniband/core/uverbs_main.c  | 2 ++
> >  drivers/infiniband/core/verbs.c        | 1 +
> >  6 files changed, 9 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/infiniband/core/cache.c
> > b/drivers/infiniband/core/cache.c index 18e476b3ced0..00fb3eacda19
> > 100644
> > +++ b/drivers/infiniband/core/cache.c
> > @@ -810,6 +810,7 @@ static void release_gid_table(struct ib_device
> *device,
> >  	if (leak)
> >  		return;
> >
> > +	mutex_destroy(&table->lock);
> >  	kfree(table->data_vec);
> >  	kfree(table);
> >  }
> > diff --git a/drivers/infiniband/core/cma_configfs.c
> > b/drivers/infiniband/core/cma_configfs.c
> > index 3ec2c415bb70..0a7b5eba2fc0 100644
> > +++ b/drivers/infiniband/core/cma_configfs.c
> > @@ -350,4 +350,5 @@ int __init cma_configfs_init(void)  void __exit
> > cma_configfs_exit(void)  {
> >  	configfs_unregister_subsystem(&cma_subsys);
> > +	mutex_destroy(&cma_subsys.su_mutex);
> >  }
> 
> There is a missing mutex_destroy in cma_configfs_init's error path.
> 
Ack. Adding it.

> > diff --git a/drivers/infiniband/core/device.c
> > b/drivers/infiniband/core/device.c
> > index 7f4affe8a10d..adf8d93bb42d 100644
> > +++ b/drivers/infiniband/core/device.c
> > @@ -508,6 +508,9 @@ static void ib_device_release(struct device *device)
> >  			  rcu_head);
> >  	}
> >
> > +	mutex_destroy(&dev->unregistration_lock);
> > +	mutex_destroy(&dev->compat_devs_mutex);
> > +
> >  	xa_destroy(&dev->compat_devs);
> >  	xa_destroy(&dev->client_data);
> >  	kfree_rcu(dev, rcu_head);
> > diff --git a/drivers/infiniband/core/user_mad.c
> > b/drivers/infiniband/core/user_mad.c
> > index 9f8a48016b41..e0512aef033c 100644
> > +++ b/drivers/infiniband/core/user_mad.c
> > @@ -1038,7 +1038,7 @@ static int ib_umad_close(struct inode *inode, struct
> file *filp)
> >  				ib_unregister_mad_agent(file->agent[i]);
> >
> >  	mutex_unlock(&file->port->file_mutex);
> > -
> > +	mutex_destroy(&file->mutex);
> >  	kfree(file);
> 
> The file->port->file_mutex is missing a destroy in ib_umad_dev_free (bit tricky
> to do)
> 
I will leave this for now and address in second iteration.

> >  	return 0;
> >  }
> > diff --git a/drivers/infiniband/core/uverbs_main.c
> > b/drivers/infiniband/core/uverbs_main.c
> > index 11c13c1381cf..4827aa3415ff 100644
> > +++ b/drivers/infiniband/core/uverbs_main.c
> > @@ -120,6 +120,8 @@ static void ib_uverbs_release_dev(struct device
> > *device)
> >
> >  	uverbs_destroy_api(dev->uapi);
> >  	cleanup_srcu_struct(&dev->disassociate_srcu);
> > +	mutex_destroy(&dev->lists_mutex);
> > +	mutex_destroy(&dev->xrcd_tree_mutex);
> 
> This file also has ucontext_lock and umap_lock that are missing destroy
> 
Ack. Adding it.

^ permalink raw reply

* [PATCH] net/mlx5e: Return in default case statement in tx_post_resync_params
From: Nathan Chancellor @ 2019-07-08 23:11 UTC (permalink / raw)
  To: Saeed Mahameed, Leon Romanovsky
  Cc: David S. Miller, Boris Pismenny, netdev, linux-rdma, linux-kernel,
	clang-built-linux, Nathan Chancellor

clang warns:

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c:251:2:
warning: variable 'rec_seq_sz' is used uninitialized whenever switch
default is taken [-Wsometimes-uninitialized]
        default:
        ^~~~~~~
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c:255:46: note:
uninitialized use occurs here
        skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
                                                    ^~~~~~~~~~
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c:239:16: note:
initialize the variable 'rec_seq_sz' to silence this warning
        u16 rec_seq_sz;
                      ^
                       = 0
1 warning generated.

This case statement was clearly designed to be one that should not be
hit during runtime because of the WARN_ON statement so just return early
to prevent copying uninitialized memory up into rn_be.

Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
Link: https://github.com/ClangBuiltLinux/linux/issues/590
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 3f5f4317a22b..5c08891806f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -250,6 +250,7 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
 	}
 	default:
 		WARN_ON(1);
+		return;
 	}
 
 	skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz);
-- 
2.22.0


^ permalink raw reply related

* Re: [PATCH v3] RDMA/core: Fix race when resolving IP address
From: Jason Gunthorpe @ 2019-07-08 23:05 UTC (permalink / raw)
  To: Dag Moxnes
  Cc: Mark Bloch, dledford@redhat.com, leon@kernel.org, Parav Pandit,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <424b75d7-90ea-8d07-42b9-de9d507b0f85@oracle.com>

On Mon, Jul 08, 2019 at 10:11:29PM +0200, Dag Moxnes wrote:
> 
> 
> Den 08.07.2019 21:38, skrev Jason Gunthorpe:
> > On Mon, Jul 08, 2019 at 07:22:45PM +0000, Mark Bloch wrote:
> > > 
> > > On 7/8/19 11:47 AM, Dag Moxnes wrote:
> > > > Thanks Jason,
> > > > 
> > > > Regards,
> > > > Dag
> > > > 
> > > > Den 08.07.2019 19:50, skrev Jason Gunthorpe:
> > > > > On Mon, Jul 08, 2019 at 01:16:24PM +0200, Dag Moxnes wrote:
> > > > > > Use neighbour lock when copying MAC address from neighbour data struct
> > > > > > in dst_fetch_ha.
> > > > > > 
> > > > > > When not using the lock, it is possible for the function to race with
> > > > > > neigh_update, causing it to copy an invalid MAC address.
> > > > > > 
> > > > > > It is possible to provoke this error by calling rdma_resolve_addr in a
> > > > > > tight loop, while deleting the corresponding ARP entry in another tight
> > > > > > loop.
> > > > > > 
> > > > > > This will cause the race shown it the following sample trace:
> > > > > > 
> > > > > > rdma_resolve_addr()
> > > > > >     rdma_resolve_ip()
> > > > > >       addr_resolve()
> > > > > >         addr_resolve_neigh()
> > > > > >           fetch_ha()
> > > > > >             dst_fetch_ha()
> > > > > >               n->nud_state == NUD_VALID
> > > > > It isn't nud_state that is the problem here, it is the parallel
> > > > > memcpy's onto ha. I fixed the commit message
> > > > > 
> > > > > This could also have been solved by using the ha_lock, but I don't
> > > > > think we have a reason to particularly over-optimize this.
> > > Sorry I'm late to the party, but why not just use: neigh_ha_snapshot()?
> > Yes, that is much better, please respin this
> OK, will do!
> Can I still post it as a v4? Or should I do it differently as you already
> applied it?

post a v4

Jason

^ permalink raw reply

* Re: [PATCH rdma-next 0/2] Allow netlink commands in non init_net net namespace
From: Jason Gunthorpe @ 2019-07-08 20:21 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Leon Romanovsky, RDMA mailing list, Parav Pandit,
	Steve Wise
In-Reply-To: <20190704130402.8431-1-leon@kernel.org>

On Thu, Jul 04, 2019 at 04:04:00PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Now that RDMA devices can be attached to specific net namespace,
> allow netlink commands in non init_net namespace.
> 
> Parav Pandit (2):
>   IB/core: Work on the caller socket net namespace in
>   nldev_newlink()

I applied the first one to for-next

>   IB: Support netlink commands in non init_net net namespaces

The second needs a resend

Thanks,
Jason

^ permalink raw reply

* Re: [PATCH rdma-next 2/2] IB: Support netlink commands in non init_net net namespaces
From: Jason Gunthorpe @ 2019-07-08 20:20 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Leon Romanovsky, RDMA mailing list, Parav Pandit,
	Steve Wise
In-Reply-To: <20190704130402.8431-3-leon@kernel.org>

On Thu, Jul 04, 2019 at 04:04:02PM +0300, Leon Romanovsky wrote:
> -int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
> +int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
>  {
> +	struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);

This should be a proper type safe accessor in all places

> -void rdma_nl_exit(void)
> +void rdma_nl_net_exit(struct rdma_dev_net *rnet)
>  {
> -	int idx;
> -
> -	for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
> -		rdma_nl_unregister(idx);

There should be a WARN_ON during the module unload that no NL clients
are still registered

Thanks,
Jason

^ permalink raw reply

* Re: [PATCH v3] RDMA/core: Fix race when resolving IP address
From: Dag Moxnes @ 2019-07-08 20:11 UTC (permalink / raw)
  To: Jason Gunthorpe, Mark Bloch
  Cc: dledford@redhat.com, leon@kernel.org, Parav Pandit,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20190708193814.GF23996@ziepe.ca>



Den 08.07.2019 21:38, skrev Jason Gunthorpe:
> On Mon, Jul 08, 2019 at 07:22:45PM +0000, Mark Bloch wrote:
>>
>> On 7/8/19 11:47 AM, Dag Moxnes wrote:
>>> Thanks Jason,
>>>
>>> Regards,
>>> Dag
>>>
>>> Den 08.07.2019 19:50, skrev Jason Gunthorpe:
>>>> On Mon, Jul 08, 2019 at 01:16:24PM +0200, Dag Moxnes wrote:
>>>>> Use neighbour lock when copying MAC address from neighbour data struct
>>>>> in dst_fetch_ha.
>>>>>
>>>>> When not using the lock, it is possible for the function to race with
>>>>> neigh_update, causing it to copy an invalid MAC address.
>>>>>
>>>>> It is possible to provoke this error by calling rdma_resolve_addr in a
>>>>> tight loop, while deleting the corresponding ARP entry in another tight
>>>>> loop.
>>>>>
>>>>> This will cause the race shown it the following sample trace:
>>>>>
>>>>> rdma_resolve_addr()
>>>>>     rdma_resolve_ip()
>>>>>       addr_resolve()
>>>>>         addr_resolve_neigh()
>>>>>           fetch_ha()
>>>>>             dst_fetch_ha()
>>>>>               n->nud_state == NUD_VALID
>>>> It isn't nud_state that is the problem here, it is the parallel
>>>> memcpy's onto ha. I fixed the commit message
>>>>
>>>> This could also have been solved by using the ha_lock, but I don't
>>>> think we have a reason to particularly over-optimize this.
>> Sorry I'm late to the party, but why not just use: neigh_ha_snapshot()?
> Yes, that is much better, please respin this
OK, will do!
Can I still post it as a v4? Or should I do it differently as you 
already applied it?

Regards,
-Dag
>
> Thanks,
> Jason


^ permalink raw reply

* Re: [PATCH rdma-next] RDMA/core: Annotate destroy of mutex to ensure that it is released as unlocked
From: Jason Gunthorpe @ 2019-07-08 20:01 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Parav Pandit, RDMA mailing list, Leon Romanovsky
In-Reply-To: <20190704130012.8177-1-leon@kernel.org>

On Thu, Jul 04, 2019 at 04:00:12PM +0300, Leon Romanovsky wrote:
> From: Parav Pandit <parav@mellanox.com>
> 
> While compiled with CONFIG_DEBUG_MUTEXES, the kernel ensures that mutex
> is not held during destroy.
> Hence add mutex_destroy() for mutexes used in RDMA modules.
> 
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>  drivers/infiniband/core/cache.c        | 1 +
>  drivers/infiniband/core/cma_configfs.c | 1 +
>  drivers/infiniband/core/device.c       | 3 +++
>  drivers/infiniband/core/user_mad.c     | 2 +-
>  drivers/infiniband/core/uverbs_main.c  | 2 ++
>  drivers/infiniband/core/verbs.c        | 1 +
>  6 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> index 18e476b3ced0..00fb3eacda19 100644
> +++ b/drivers/infiniband/core/cache.c
> @@ -810,6 +810,7 @@ static void release_gid_table(struct ib_device *device,
>  	if (leak)
>  		return;
>  
> +	mutex_destroy(&table->lock);
>  	kfree(table->data_vec);
>  	kfree(table);
>  }
> diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
> index 3ec2c415bb70..0a7b5eba2fc0 100644
> +++ b/drivers/infiniband/core/cma_configfs.c
> @@ -350,4 +350,5 @@ int __init cma_configfs_init(void)
>  void __exit cma_configfs_exit(void)
>  {
>  	configfs_unregister_subsystem(&cma_subsys);
> +	mutex_destroy(&cma_subsys.su_mutex);
>  }

There is a missing mutex_destroy in cma_configfs_init's error path.

> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 7f4affe8a10d..adf8d93bb42d 100644
> +++ b/drivers/infiniband/core/device.c
> @@ -508,6 +508,9 @@ static void ib_device_release(struct device *device)
>  			  rcu_head);
>  	}
>  
> +	mutex_destroy(&dev->unregistration_lock);
> +	mutex_destroy(&dev->compat_devs_mutex);
> +
>  	xa_destroy(&dev->compat_devs);
>  	xa_destroy(&dev->client_data);
>  	kfree_rcu(dev, rcu_head);
> diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
> index 9f8a48016b41..e0512aef033c 100644
> +++ b/drivers/infiniband/core/user_mad.c
> @@ -1038,7 +1038,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
>  				ib_unregister_mad_agent(file->agent[i]);
>  
>  	mutex_unlock(&file->port->file_mutex);
> -
> +	mutex_destroy(&file->mutex);
>  	kfree(file);

The file->port->file_mutex is missing a destroy in ib_umad_dev_free
(bit tricky to do)

>  	return 0;
>  }
> diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
> index 11c13c1381cf..4827aa3415ff 100644
> +++ b/drivers/infiniband/core/uverbs_main.c
> @@ -120,6 +120,8 @@ static void ib_uverbs_release_dev(struct device *device)
>  
>  	uverbs_destroy_api(dev->uapi);
>  	cleanup_srcu_struct(&dev->disassociate_srcu);
> +	mutex_destroy(&dev->lists_mutex);
> +	mutex_destroy(&dev->xrcd_tree_mutex);

This file also has ucontext_lock and umap_lock that are missing
destroy

Jason

^ permalink raw reply

* Re: [PATCH] Make rxe driver to calculate correct byte_len on receiving side when work completion is generated with IB_WC_RECV_RDMA_WITH_IMM opcode.
From: Jason Gunthorpe @ 2019-07-08 19:51 UTC (permalink / raw)
  To: Konstantin Taranov; +Cc: monis, linux-rdma
In-Reply-To: <20190627140643.6191-1-konstantin.taranov@inf.ethz.ch>

On Thu, Jun 27, 2019 at 04:06:43PM +0200, Konstantin Taranov wrote:
> Make softRoce to calculate correct byte_len on receiving side when work completion
> is generated with IB_WC_RECV_RDMA_WITH_IMM opcode.
> 
> According to documentation byte_len must indicate the number of written
> bytes, whereas it was always equal to zero for IB_WC_RECV_RDMA_WITH_IMM opcode.
> 
> The patch proposes to remember the length of an RDMA request from the RETH header, and use it 
> as byte_len when the work completion with IB_WC_RECV_RDMA_WITH_IMM opcode is generated.
> 
> Signed-off-by: Konstantin Taranov <konstantin.taranov@inf.ethz.ch>
> ---
>  drivers/infiniband/sw/rxe/rxe_resp.c  | 5 ++++-
>  drivers/infiniband/sw/rxe/rxe_verbs.h | 1 +
>  2 files changed, 5 insertions(+), 1 deletion(-)

Applied to for-next, thanks

Jason

^ permalink raw reply

* Re: [PATCH v3] RDMA/core: Fix race when resolving IP address
From: Jason Gunthorpe @ 2019-07-08 19:38 UTC (permalink / raw)
  To: Mark Bloch
  Cc: Dag Moxnes, dledford@redhat.com, leon@kernel.org, Parav Pandit,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <63b9d2cb-f69c-d77c-7803-f08e2a6f755d@mellanox.com>

On Mon, Jul 08, 2019 at 07:22:45PM +0000, Mark Bloch wrote:
> 
> 
> On 7/8/19 11:47 AM, Dag Moxnes wrote:
> > Thanks Jason,
> > 
> > Regards,
> > Dag
> > 
> > Den 08.07.2019 19:50, skrev Jason Gunthorpe:
> >> On Mon, Jul 08, 2019 at 01:16:24PM +0200, Dag Moxnes wrote:
> >>> Use neighbour lock when copying MAC address from neighbour data struct
> >>> in dst_fetch_ha.
> >>>
> >>> When not using the lock, it is possible for the function to race with
> >>> neigh_update, causing it to copy an invalid MAC address.
> >>>
> >>> It is possible to provoke this error by calling rdma_resolve_addr in a
> >>> tight loop, while deleting the corresponding ARP entry in another tight
> >>> loop.
> >>>
> >>> This will cause the race shown it the following sample trace:
> >>>
> >>> rdma_resolve_addr()
> >>>    rdma_resolve_ip()
> >>>      addr_resolve()
> >>>        addr_resolve_neigh()
> >>>          fetch_ha()
> >>>            dst_fetch_ha()
> >>>              n->nud_state == NUD_VALID
> >> It isn't nud_state that is the problem here, it is the parallel
> >> memcpy's onto ha. I fixed the commit message
> >>
> >> This could also have been solved by using the ha_lock, but I don't
> >> think we have a reason to particularly over-optimize this.
> 
> Sorry I'm late to the party, but why not just use: neigh_ha_snapshot()?

Yes, that is much better, please respin this

Thanks,
Jason

^ permalink raw reply

* Re: [PATCH v3] RDMA/core: Fix race when resolving IP address
From: Mark Bloch @ 2019-07-08 19:22 UTC (permalink / raw)
  To: Dag Moxnes, Jason Gunthorpe
  Cc: dledford@redhat.com, leon@kernel.org, Parav Pandit,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <4b9ae7b8-310c-e0b6-7a8e-33e6d5bef83d@oracle.com>



On 7/8/19 11:47 AM, Dag Moxnes wrote:
> Thanks Jason,
> 
> Regards,
> Dag
> 
> Den 08.07.2019 19:50, skrev Jason Gunthorpe:
>> On Mon, Jul 08, 2019 at 01:16:24PM +0200, Dag Moxnes wrote:
>>> Use neighbour lock when copying MAC address from neighbour data struct
>>> in dst_fetch_ha.
>>>
>>> When not using the lock, it is possible for the function to race with
>>> neigh_update, causing it to copy an invalid MAC address.
>>>
>>> It is possible to provoke this error by calling rdma_resolve_addr in a
>>> tight loop, while deleting the corresponding ARP entry in another tight
>>> loop.
>>>
>>> This will cause the race shown it the following sample trace:
>>>
>>> rdma_resolve_addr()
>>>    rdma_resolve_ip()
>>>      addr_resolve()
>>>        addr_resolve_neigh()
>>>          fetch_ha()
>>>            dst_fetch_ha()
>>>              n->nud_state == NUD_VALID
>> It isn't nud_state that is the problem here, it is the parallel
>> memcpy's onto ha. I fixed the commit message
>>
>> This could also have been solved by using the ha_lock, but I don't
>> think we have a reason to particularly over-optimize this.

Sorry I'm late to the party, but why not just use: neigh_ha_snapshot()?

>>
>>>   drivers/infiniband/core/addr.c | 9 ++++++---
>>>   1 file changed, 6 insertions(+), 3 deletions(-)
>> Applied to for-next, thanks
>>
>> Jason
> 

Mark

^ permalink raw reply

* Re: [PATCH rdma-next v5 0/4] Use RDMA adaptive moderation library
From: Jason Gunthorpe @ 2019-07-08 19:02 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Leon Romanovsky, RDMA mailing list, Max Gurtovoy,
	Saeed Mahameed, Sagi Grimberg, Yamin Friedman
In-Reply-To: <20190708105905.27468-1-leon@kernel.org>

On Mon, Jul 08, 2019 at 01:59:01PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Hi,
> 
> This is RDMA part of previously sent DIM library improvements series
> [1], which was pulled by Dave. It needs to be pulled to RDMA too as
> a pre-requirements.
> 
> Changes since v4:
>  * Separated mlx5 change from IB/core changes.
> 
> Changes since v3:
>  * Renamed dim_owner to be priv
>  * Added Sagi's ROBs
>  * Removed casting of void pointer.
> 
> Changes since v2:
> - renamed user-space knob from dim to adaptive-moderation (Sagi)
> - some minor code clean ups (Sagi)
> - Reordered patches to ensure that netlink expose is last in the series.
> - Slightly cleaned commit messages
> - Changed "bool use_cq_dim" flag to be bitwise to save space.
> 
> Changes since v1:
> - added per ib device configuration knob for rdma-dim (Sagi)
> - add NL directives for user-space / rdma tool to configure rdma dim
>   (Sagi/Leon)
> - use one header file for DIM implementations (Leon)
> - various point changes in the rdma dim related code in the IB core
>   (Leon)
> - removed the RDMA specific patches form this pull request\
> 
> Thanks
> 
> [1] https://www.spinics.net/lists/netdev/msg581046.html
> 
> Leon Romanovsky (1):
>   RDMA/mlx5: Set RDMA DIM to be enabled by default
> 
> Yamin Friedman (3):
>   linux/dim: Implement RDMA adaptive moderation (DIM)
>   RDMA/core: Provide RDMA DIM support for ULPs
>   RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation
>     to netlink

Applied to for-next

Thanks,
Jason

^ permalink raw reply

* Re: [PATCH v3] RDMA/core: Fix race when resolving IP address
From: Dag Moxnes @ 2019-07-08 18:47 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: dledford, leon, parav, linux-rdma, linux-kernel
In-Reply-To: <20190708175025.GA6976@ziepe.ca>

Thanks Jason,

Regards,
Dag

Den 08.07.2019 19:50, skrev Jason Gunthorpe:
> On Mon, Jul 08, 2019 at 01:16:24PM +0200, Dag Moxnes wrote:
>> Use neighbour lock when copying MAC address from neighbour data struct
>> in dst_fetch_ha.
>>
>> When not using the lock, it is possible for the function to race with
>> neigh_update, causing it to copy an invalid MAC address.
>>
>> It is possible to provoke this error by calling rdma_resolve_addr in a
>> tight loop, while deleting the corresponding ARP entry in another tight
>> loop.
>>
>> This will cause the race shown it the following sample trace:
>>
>> rdma_resolve_addr()
>>    rdma_resolve_ip()
>>      addr_resolve()
>>        addr_resolve_neigh()
>>          fetch_ha()
>>            dst_fetch_ha()
>>              n->nud_state == NUD_VALID
> It isn't nud_state that is the problem here, it is the parallel
> memcpy's onto ha. I fixed the commit message
>
> This could also have been solved by using the ha_lock, but I don't
> think we have a reason to particularly over-optimize this.
>
>>   drivers/infiniband/core/addr.c | 9 ++++++---
>>   1 file changed, 6 insertions(+), 3 deletions(-)
> Applied to for-next, thanks
>
> Jason


^ permalink raw reply

* Re: use exact allocation for dma coherent memory
From: Christoph Hellwig @ 2019-07-08 18:43 UTC (permalink / raw)
  To: Arend Van Spriel
  Cc: Christoph Hellwig, Maarten Lankhorst, Maxime Ripard, Sean Paul,
	David Airlie, Daniel Vetter, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Ian Abbott, H Hartley Sweeten, devel, linux-s390,
	Intel Linux Wireless, linux-rdma, netdev, intel-gfx,
	linux-wireless, linux-kernel, dri-devel, linux-mm, iommu,
	moderated list:ARM PORT, linux-media
In-Reply-To: <74eb9d99-6aa6-d1ad-e66d-6cc9c496b2f3@broadcom.com>

On Tue, Jul 02, 2019 at 11:48:44AM +0200, Arend Van Spriel wrote:
> You made me look ;-) Actually not touching my drivers so I'm off the hook. 
> However, I was wondering if drivers could know so I decided to look into 
> the DMA-API.txt documentation which currently states:
>
> """
> The flag parameter (dma_alloc_coherent() only) allows the caller to
> specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
> implementation may choose to ignore flags that affect the location of
> the returned memory, like GFP_DMA).
> """
>
> I do expect you are going to change that description as well now that you 
> are going to issue a warning on __GFP_COMP. Maybe include that in patch 
> 15/16 where you introduce that warning.

Yes, that description needs an updated, even without this series.
I'll make sure it is more clear.

^ permalink raw reply

* Re: [PATCH v3] RDMA/core: Fix race when resolving IP address
From: Jason Gunthorpe @ 2019-07-08 17:50 UTC (permalink / raw)
  To: Dag Moxnes; +Cc: dledford, leon, parav, linux-rdma, linux-kernel
In-Reply-To: <1562584584-13132-1-git-send-email-dag.moxnes@oracle.com>

On Mon, Jul 08, 2019 at 01:16:24PM +0200, Dag Moxnes wrote:
> Use neighbour lock when copying MAC address from neighbour data struct
> in dst_fetch_ha.
> 
> When not using the lock, it is possible for the function to race with
> neigh_update, causing it to copy an invalid MAC address.
> 
> It is possible to provoke this error by calling rdma_resolve_addr in a
> tight loop, while deleting the corresponding ARP entry in another tight
> loop.
> 
> This will cause the race shown it the following sample trace:
> 
> rdma_resolve_addr()
>   rdma_resolve_ip()
>     addr_resolve()
>       addr_resolve_neigh()
>         fetch_ha()
>           dst_fetch_ha()
>             n->nud_state == NUD_VALID

It isn't nud_state that is the problem here, it is the parallel
memcpy's onto ha. I fixed the commit message

This could also have been solved by using the ha_lock, but I don't
think we have a reason to particularly over-optimize this.

>  drivers/infiniband/core/addr.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)

Applied to for-next, thanks

Jason

^ permalink raw reply

* Re: [PATCH rdma-next] IB/mlx5: Report correctly tag matching rendezvous capability
From: Jason Gunthorpe @ 2019-07-08 17:39 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Danit Goldberg, RDMA mailing list, Artemy Kovalyov,
	Yishai Hadas, Leon Romanovsky
In-Reply-To: <20190705162157.17336-1-leon@kernel.org>

On Fri, Jul 05, 2019 at 07:21:57PM +0300, Leon Romanovsky wrote:
> From: Danit Goldberg <danitg@mellanox.com>
> 
> Tag matching with rendezvous offload for RC transport is controlled
> by FW and before this change, it was advertised to user as supported
> without any relation to FW.
> 
> Separate tag matching for rendezvous and eager protocols, so users
> will see real capabilities.
> 
> Cc: <stable@vger.kernel.org> # 4.13
> Fixes: eb761894351d ("IB/mlx5: Fill XRQ capabilities")
> Signed-off-by: Danit Goldberg <danitg@mellanox.com>
> Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
> Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
>  drivers/infiniband/hw/mlx5/main.c | 8 ++++++--
>  include/rdma/ib_verbs.h           | 4 ++--
>  2 files changed, 8 insertions(+), 4 deletions(-)

This is a bit late, but ince this is for stable, applied to for-next, thanks
 
Jason

^ permalink raw reply

* Re: [PATCH rdma-next] IB/mlx5: Report correctly tag matching rendezvous capability
From: Jason Gunthorpe @ 2019-07-08 17:26 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Danit Goldberg, RDMA mailing list, Artemy Kovalyov,
	Yishai Hadas
In-Reply-To: <20190706163523.GA18182@mtr-leonro.mtl.com>

On Sat, Jul 06, 2019 at 07:35:23PM +0300, Leon Romanovsky wrote:
> > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> > > index 30eb68f36109..c5f8a9f17063 100644
> > > +++ b/include/rdma/ib_verbs.h
> > > @@ -308,8 +308,8 @@ struct ib_rss_caps {
> > >  };
> > >
> > >  enum ib_tm_cap_flags {
> > > -	/*  Support tag matching on RC transport */
> > > -	IB_TM_CAP_RC		    = 1 << 0,
> > > +	/*  Support tag matching with rendezvous offload for RC transport */
> > > +	IB_TM_CAP_RNDV_RC = 1 << 0,
> > >  };
> >
> > This is in the wrong header, right?
> 
> It predates our all-to-uapi headers approach and moving to UAPI this struct
> is definitely too much for a fix which should go to stable@.

Well, lets send a patch to fix it please

Jason

^ permalink raw reply

* Re: [PATCH 35/39] docs: infiniband: add it to the driver-api bookset
From: Jason Gunthorpe @ 2019-07-08 17:25 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: Linux Doc Mailing List, Mauro Carvalho Chehab, linux-kernel,
	Jonathan Corbet, Doug Ledford, linux-rdma
In-Reply-To: <20190706081950.4a629537@coco.lan>

On Sat, Jul 06, 2019 at 08:19:50AM -0300, Mauro Carvalho Chehab wrote:
> Em Wed, 3 Jul 2019 15:08:02 -0300
> Jason Gunthorpe <jgg@ziepe.ca> escreveu:
> 
> > On Fri, Jun 28, 2019 at 09:30:28AM -0300, Mauro Carvalho Chehab wrote:
> > > While this contains some uAPI stuff, it was intended to be
> > > read by a kernel doc. So, let's not move it to a different
> > > dir, but, instead, just add it to the driver-api bookset.
> > > 
> > > Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
> > >  Documentation/index.rst            | 1 +
> > >  Documentation/infiniband/index.rst | 2 +-
> > >  2 files changed, 2 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/Documentation/index.rst b/Documentation/index.rst
> > > index ea33cbbccd9d..e69d2fde7735 100644
> > > +++ b/Documentation/index.rst
> > > @@ -96,6 +96,7 @@ needed).
> > >     block/index
> > >     hid/index
> > >     iio/index
> > > +   infiniband/index
> > >     leds/index
> > >     media/index
> > >     networking/index
> > > diff --git a/Documentation/infiniband/index.rst b/Documentation/infiniband/index.rst
> > > index 22eea64de722..9cd7615438b9 100644
> > > +++ b/Documentation/infiniband/index.rst
> > > @@ -1,4 +1,4 @@
> > > -:orphan:
> > > +.. SPDX-License-Identifier: GPL-2.0
> > >  
> > >  ==========
> > >  InfiniBand  
> > 
> > Should this one go to the rdma.git as well? It looks like yes
> 
> I'm OK if you want to add to rdma.git. However, this will likely rise 
> conflicts, though, as this series has lots of other patches touching
> Documentation/index.rst. 

Applied now, it seems better to keep everything consistent

Jason

^ permalink raw reply

* Re: [PATCH rdma-next 0/2] DEVX VHCA tunnel support
From: Jason Gunthorpe @ 2019-07-08 16:58 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Leon Romanovsky, RDMA mailing list, Max Gurtovoy,
	Yishai Hadas, Saeed Mahameed, linux-netdev
In-Reply-To: <20190701181402.25286-1-leon@kernel.org>

On Mon, Jul 01, 2019 at 09:14:00PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Hi,
> 
> Those two patches introduce VHCA tunnel mechanism to DEVX interface
> needed for Bluefield SOC. See extensive commit messages for more
> information.
> 
> Thanks
> 
> Max Gurtovoy (2):
>   net/mlx5: Introduce VHCA tunnel device capability
>   IB/mlx5: Implement VHCA tunnel mechanism in DEVX

Thanks, applied to for-next

Jason

^ permalink raw reply

* Re: [PATCH] [net-next] net/mlx5e: xsk: dynamically allocate mlx5e_channel_param
From: Maxim Mikityanskiy @ 2019-07-08 15:16 UTC (permalink / raw)
  To: Arnd Bergmann, Saeed Mahameed, David S. Miller,
	Alexei Starovoitov, Tariq Toukan
  Cc: Leon Romanovsky, Daniel Borkmann, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org,
	xdp-newbies@vger.kernel.org, bpf@vger.kernel.org
In-Reply-To: <20190708125554.3863901-1-arnd@arndb.de>

On 2019-07-08 15:55, Arnd Bergmann wrote:
> The structure is too large to put on the stack, resulting in a
> warning on 32-bit ARM:
> 
> drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c:59:5: error: stack frame size of 1344 bytes in function
>        'mlx5e_open_xsk' [-Werror,-Wframe-larger-than=]
> 
> Use kzalloc() instead.
> 
> Fixes: a038e9794541 ("net/mlx5e: Add XSK zero-copy support")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
>   .../mellanox/mlx5/core/en/xsk/setup.c         | 25 ++++++++++++-------
>   1 file changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
> index aaffa6f68dc0..db9bbec68dbf 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
> @@ -60,24 +60,28 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
>   		   struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
>   		   struct mlx5e_channel *c)
>   {
> -	struct mlx5e_channel_param cparam = {};
> +	struct mlx5e_channel_param *cparam;
>   	struct dim_cq_moder icocq_moder = {};
>   	int err;
>   
>   	if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
>   		return -EINVAL;
>   
> -	mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
> +	cparam = kzalloc(sizeof(*cparam), GFP_KERNEL);

Similar code in mlx5e_open_channels (en_main.c) uses kvzalloc. Although 
the struct is currently smaller than a page anyway, and there should be 
no difference in behavior now, I suggest using the same alloc function 
to keep code uniform.

> +	if (!cparam)
> +		return -ENOMEM;
>   
> -	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
> +	mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
> +
> +	err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq);
>   	if (unlikely(err))
> -		return err;
> +		goto err_kfree_cparam;
>   
> -	err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
> +	err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq);
>   	if (unlikely(err))
>   		goto err_close_rx_cq;
>   
> -	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
> +	err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq);
>   	if (unlikely(err))
>   		goto err_close_rq;
>   
> @@ -87,18 +91,18 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
>   	 * is disabled and then reenabled, but the SQ continues receiving CQEs
>   	 * from the old UMEM.
>   	 */
> -	err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
> +	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true);
>   	if (unlikely(err))
>   		goto err_close_tx_cq;
>   
> -	err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
> +	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
>   	if (unlikely(err))
>   		goto err_close_sq;
>   
>   	/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
>   	 * triggered and NAPI to be called on the correct CPU.
>   	 */
> -	err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
> +	err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
>   	if (unlikely(err))
>   		goto err_close_icocq;
>   

Here is kfree missing. It's a memory leak in the good path.

Thanks!

> @@ -123,6 +127,9 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
>   err_close_rx_cq:
>   	mlx5e_close_cq(&c->xskrq.cq);
>   
> +err_kfree_cparam:
> +	kfree(cparam);
> +
>   	return err;
>   }
>   
> 


^ permalink raw reply

* Re:  Re: Re: linux-next: build failure after merge of the rdma tree
From: Bernard Metzler @ 2019-07-08 15:08 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Stephen Rothwell, Doug Ledford, Linux Next Mailing List,
	Linux Kernel Mailing List, linux-rdma@vger.kernel.org
In-Reply-To: <20190708145630.GG23966@mellanox.com>

-----"Jason Gunthorpe" <jgg@mellanox.com> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>
>From: "Jason Gunthorpe" <jgg@mellanox.com>
>Date: 07/08/2019 04:56PM
>Cc: "Stephen Rothwell" <sfr@canb.auug.org.au>, "Doug Ledford"
><dledford@redhat.com>, "Linux Next Mailing List"
><linux-next@vger.kernel.org>, "Linux Kernel Mailing List"
><linux-kernel@vger.kernel.org>, "linux-rdma@vger.kernel.org"
><linux-rdma@vger.kernel.org>
>Subject: [EXTERNAL] Re: Re: linux-next: build failure after merge of
>the rdma tree
>
>On Mon, Jul 08, 2019 at 02:28:13PM +0000, Bernard Metzler wrote:
>
>> Thanks for  bringing this up. Indeed, that explicit
>> initialization seem to be inappropriate. Can you please
>> fix that as you suggest?
>
>I'm applying this to fix the PER_CPU stuff in siw:
>
>From 4c7d6dcd364843e408a60952ba914bb72bafc6cc Mon Sep 17 00:00:00
>2001
>From: Jason Gunthorpe <jgg@mellanox.com>
>Date: Mon, 8 Jul 2019 11:36:32 -0300
>Subject: [PATCH] RDMA/siw: Fix DEFINE_PER_CPU compilation when
> ARCH_NEEDS_WEAK_PER_CPU
>
>The initializer for the variable cannot be inside the macro (and zero
>initialization isn't needed anyhow).
>
>include/linux/percpu-defs.h:92:33: warning: '__pcpu_unique_use_cnt'
>initialized and declared 'extern'
>  extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;  \
>                                 ^~~~~~~~~~~~~~
>include/linux/percpu-defs.h:115:2: note: in expansion of macro
>'DEFINE_PER_CPU_SECTION'
>  DEFINE_PER_CPU_SECTION(type, name, "")
>  ^~~~~~~~~~~~~~~~~~~~~~
>drivers/infiniband/sw/siw/siw_main.c:129:8: note: in expansion of
>macro 'DEFINE_PER_CPU'
> static DEFINE_PER_CPU(atomic_t, use_cnt = ATOMIC_INIT(0));
>        ^~~~~~~~~~~~~~
>
>Also the rules for PER_CPU require the variable names to be globally
>unique, so prefix them with siw_
>
>Fixes: b9be6f18cf9e ("rdma/siw: transmit path")
>Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
>Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
>Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
>---
> drivers/infiniband/sw/siw/siw_main.c  |  8 ++++----
> drivers/infiniband/sw/siw/siw_qp_tx.c | 10 +++++-----
> 2 files changed, 9 insertions(+), 9 deletions(-)
>
>diff --git a/drivers/infiniband/sw/siw/siw_main.c
>b/drivers/infiniband/sw/siw/siw_main.c
>index 3f5f3d27ebe5a1..fd2552a9091dee 100644
>--- a/drivers/infiniband/sw/siw/siw_main.c
>+++ b/drivers/infiniband/sw/siw/siw_main.c
>@@ -126,7 +126,7 @@ static int siw_dev_qualified(struct net_device
>*netdev)
> 	return 0;
> }
> 
>-static DEFINE_PER_CPU(atomic_t, use_cnt = ATOMIC_INIT(0));
>+static DEFINE_PER_CPU(atomic_t, siw_use_cnt);
> 
> static struct {
> 	struct cpumask **tx_valid_cpus;
>@@ -215,7 +215,7 @@ int siw_get_tx_cpu(struct siw_device *sdev)
> 		if (!siw_tx_thread[cpu])
> 			continue;
> 
>-		usage = atomic_read(&per_cpu(use_cnt, cpu));
>+		usage = atomic_read(&per_cpu(siw_use_cnt, cpu));
> 		if (usage <= min_use) {
> 			tx_cpu = cpu;
> 			min_use = usage;
>@@ -226,7 +226,7 @@ int siw_get_tx_cpu(struct siw_device *sdev)
> 
> out:
> 	if (tx_cpu >= 0)
>-		atomic_inc(&per_cpu(use_cnt, tx_cpu));
>+		atomic_inc(&per_cpu(siw_use_cnt, tx_cpu));
> 	else
> 		pr_warn("siw: no tx cpu found\n");
> 
>@@ -235,7 +235,7 @@ int siw_get_tx_cpu(struct siw_device *sdev)
> 
> void siw_put_tx_cpu(int cpu)
> {
>-	atomic_dec(&per_cpu(use_cnt, cpu));
>+	atomic_dec(&per_cpu(siw_use_cnt, cpu));
> }
> 
> static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int
>id)
>diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c
>b/drivers/infiniband/sw/siw/siw_qp_tx.c
>index 5e926fac51db30..1c9fa8fa96e513 100644
>--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
>+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
>@@ -1183,12 +1183,12 @@ struct tx_task_t {
> 	wait_queue_head_t waiting;
> };
> 
>-static DEFINE_PER_CPU(struct tx_task_t, tx_task_g);
>+static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g);
> 
> void siw_stop_tx_thread(int nr_cpu)
> {
> 	kthread_stop(siw_tx_thread[nr_cpu]);
>-	wake_up(&per_cpu(tx_task_g, nr_cpu).waiting);
>+	wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting);
> }
> 
> int siw_run_sq(void *data)
>@@ -1196,7 +1196,7 @@ int siw_run_sq(void *data)
> 	const int nr_cpu = (unsigned int)(long)data;
> 	struct llist_node *active;
> 	struct siw_qp *qp;
>-	struct tx_task_t *tx_task = &per_cpu(tx_task_g, nr_cpu);
>+	struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu);
> 
> 	init_llist_head(&tx_task->active);
> 	init_waitqueue_head(&tx_task->waiting);
>@@ -1261,9 +1261,9 @@ int siw_sq_start(struct siw_qp *qp)
> 	}
> 	siw_qp_get(qp);
> 
>-	llist_add(&qp->tx_list, &per_cpu(tx_task_g, qp->tx_cpu).active);
>+	llist_add(&qp->tx_list, &per_cpu(siw_tx_task_g,
>qp->tx_cpu).active);
> 
>-	wake_up(&per_cpu(tx_task_g, qp->tx_cpu).waiting);
>+	wake_up(&per_cpu(siw_tx_task_g, qp->tx_cpu).waiting);
> 
> 	return 0;
> }
>-- 
>2.21.0
>
>

Many thanks Jason!

Very much appreciated!

Bernard.


^ permalink raw reply

* Re: Re: linux-next: build failure after merge of the rdma tree
From: Jason Gunthorpe @ 2019-07-08 14:56 UTC (permalink / raw)
  To: Bernard Metzler
  Cc: Stephen Rothwell, Doug Ledford, Linux Next Mailing List,
	Linux Kernel Mailing List, linux-rdma@vger.kernel.org
In-Reply-To: <OF8B5D0A35.3AB4C2D3-ON00258431.004F7CF8-00258431.004F7D00@notes.na.collabserv.com>

On Mon, Jul 08, 2019 at 02:28:13PM +0000, Bernard Metzler wrote:

> Thanks for  bringing this up. Indeed, that explicit
> initialization seem to be inappropriate. Can you please
> fix that as you suggest?

I'm applying this to fix the PER_CPU stuff in siw:

From 4c7d6dcd364843e408a60952ba914bb72bafc6cc Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 8 Jul 2019 11:36:32 -0300
Subject: [PATCH] RDMA/siw: Fix DEFINE_PER_CPU compilation when
 ARCH_NEEDS_WEAK_PER_CPU

The initializer for the variable cannot be inside the macro (and zero
initialization isn't needed anyhow).

include/linux/percpu-defs.h:92:33: warning: '__pcpu_unique_use_cnt' initialized and declared 'extern'
  extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;  \
                                 ^~~~~~~~~~~~~~
include/linux/percpu-defs.h:115:2: note: in expansion of macro 'DEFINE_PER_CPU_SECTION'
  DEFINE_PER_CPU_SECTION(type, name, "")
  ^~~~~~~~~~~~~~~~~~~~~~
drivers/infiniband/sw/siw/siw_main.c:129:8: note: in expansion of macro 'DEFINE_PER_CPU'
 static DEFINE_PER_CPU(atomic_t, use_cnt = ATOMIC_INIT(0));
        ^~~~~~~~~~~~~~

Also the rules for PER_CPU require the variable names to be globally
unique, so prefix them with siw_

Fixes: b9be6f18cf9e ("rdma/siw: transmit path")
Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/sw/siw/siw_main.c  |  8 ++++----
 drivers/infiniband/sw/siw/siw_qp_tx.c | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 3f5f3d27ebe5a1..fd2552a9091dee 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -126,7 +126,7 @@ static int siw_dev_qualified(struct net_device *netdev)
 	return 0;
 }
 
-static DEFINE_PER_CPU(atomic_t, use_cnt = ATOMIC_INIT(0));
+static DEFINE_PER_CPU(atomic_t, siw_use_cnt);
 
 static struct {
 	struct cpumask **tx_valid_cpus;
@@ -215,7 +215,7 @@ int siw_get_tx_cpu(struct siw_device *sdev)
 		if (!siw_tx_thread[cpu])
 			continue;
 
-		usage = atomic_read(&per_cpu(use_cnt, cpu));
+		usage = atomic_read(&per_cpu(siw_use_cnt, cpu));
 		if (usage <= min_use) {
 			tx_cpu = cpu;
 			min_use = usage;
@@ -226,7 +226,7 @@ int siw_get_tx_cpu(struct siw_device *sdev)
 
 out:
 	if (tx_cpu >= 0)
-		atomic_inc(&per_cpu(use_cnt, tx_cpu));
+		atomic_inc(&per_cpu(siw_use_cnt, tx_cpu));
 	else
 		pr_warn("siw: no tx cpu found\n");
 
@@ -235,7 +235,7 @@ int siw_get_tx_cpu(struct siw_device *sdev)
 
 void siw_put_tx_cpu(int cpu)
 {
-	atomic_dec(&per_cpu(use_cnt, cpu));
+	atomic_dec(&per_cpu(siw_use_cnt, cpu));
 }
 
 static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id)
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 5e926fac51db30..1c9fa8fa96e513 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -1183,12 +1183,12 @@ struct tx_task_t {
 	wait_queue_head_t waiting;
 };
 
-static DEFINE_PER_CPU(struct tx_task_t, tx_task_g);
+static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g);
 
 void siw_stop_tx_thread(int nr_cpu)
 {
 	kthread_stop(siw_tx_thread[nr_cpu]);
-	wake_up(&per_cpu(tx_task_g, nr_cpu).waiting);
+	wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting);
 }
 
 int siw_run_sq(void *data)
@@ -1196,7 +1196,7 @@ int siw_run_sq(void *data)
 	const int nr_cpu = (unsigned int)(long)data;
 	struct llist_node *active;
 	struct siw_qp *qp;
-	struct tx_task_t *tx_task = &per_cpu(tx_task_g, nr_cpu);
+	struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu);
 
 	init_llist_head(&tx_task->active);
 	init_waitqueue_head(&tx_task->waiting);
@@ -1261,9 +1261,9 @@ int siw_sq_start(struct siw_qp *qp)
 	}
 	siw_qp_get(qp);
 
-	llist_add(&qp->tx_list, &per_cpu(tx_task_g, qp->tx_cpu).active);
+	llist_add(&qp->tx_list, &per_cpu(siw_tx_task_g, qp->tx_cpu).active);
 
-	wake_up(&per_cpu(tx_task_g, qp->tx_cpu).waiting);
+	wake_up(&per_cpu(siw_tx_task_g, qp->tx_cpu).waiting);
 
 	return 0;
 }
-- 
2.21.0


^ permalink raw reply related

* Re: [rdma 14/16] RDMA/irdma: Add ABI definitions
From: Jason Gunthorpe @ 2019-07-08 14:13 UTC (permalink / raw)
  To: Saleem, Shiraz
  Cc: Leon Romanovsky, Kirsher, Jeffrey T, dledford@redhat.com,
	davem@davemloft.net, Ismail, Mustafa, linux-rdma@vger.kernel.org,
	netdev@vger.kernel.org, nhorman@redhat.com, sassmann@redhat.com,
	poswald@suse.com, Ertman, David M
In-Reply-To: <9DD61F30A802C4429A01CA4200E302A7A68512AA@fmsmsx124.amr.corp.intel.com>

On Sat, Jul 06, 2019 at 04:15:20PM +0000, Saleem, Shiraz wrote:
> > Subject: Re: [rdma 14/16] RDMA/irdma: Add ABI definitions
> > 
> > On Fri, Jul 05, 2019 at 04:42:19PM +0000, Saleem, Shiraz wrote:
> > > > Subject: Re: [rdma 14/16] RDMA/irdma: Add ABI definitions
> > > >
> > > > On Thu, Jul 04, 2019 at 10:40:21AM +0300, Leon Romanovsky wrote:
> > > > > On Wed, Jul 03, 2019 at 07:12:57PM -0700, Jeff Kirsher wrote:
> > > > > > From: Mustafa Ismail <mustafa.ismail@intel.com>
> > > > > >
> > > > > > Add ABI definitions for irdma.
> > > > > >
> > > > > > Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
> > > > > > Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
> > > > > > include/uapi/rdma/irdma-abi.h | 130
> > > > > > ++++++++++++++++++++++++++++++++++
> > > > > >  1 file changed, 130 insertions(+)  create mode 100644
> > > > > > include/uapi/rdma/irdma-abi.h
> > > > > >
> > > > > > diff --git a/include/uapi/rdma/irdma-abi.h
> > > > > > b/include/uapi/rdma/irdma-abi.h new file mode 100644 index
> > > > > > 000000000000..bdfbda4c829e
> > > > > > +++ b/include/uapi/rdma/irdma-abi.h
> > > > > > @@ -0,0 +1,130 @@
> > > > > > +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
> > > > > > +/* Copyright (c) 2006 - 2019 Intel Corporation.  All rights reserved.
> > > > > > + * Copyright (c) 2005 Topspin Communications.  All rights reserved.
> > > > > > + * Copyright (c) 2005 Cisco Systems.  All rights reserved.
> > > > > > + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
> > > > > > + */
> > > > > > +
> > > > > > +#ifndef IRDMA_ABI_H
> > > > > > +#define IRDMA_ABI_H
> > > > > > +
> > > > > > +#include <linux/types.h>
> > > > > > +
> > > > > > +/* irdma must support legacy GEN_1 i40iw kernel
> > > > > > + * and user-space whose last ABI ver is 5  */ #define
> > > > > > +IRDMA_ABI_VER
> > > > > > +6
> > > > >
> > > > > Can you please elaborate about it more?
> > > > > There is no irdma code in RDMA yet, so it makes me wonder why new
> > > > > define shouldn't start from 1.
> > > >
> > > > It is because they are ABI compatible with the current user space,
> > > > which raises the question why we even have this confusing header file..
> > >
> > > It is because we need to support current providers/i40iw user-space.
> > > Our user-space patch series will introduce a new provider (irdma)
> > > whose ABI ver. is also 6 (capable of supporting X722 and which will
> > > work with i40iw driver on older kernels) and removes providers/i40iw from rdma-
> > core.
> > 
> > Why on earth would we do that?
> > 
> A unified library providers/irdma to go in hand with the driver irdma and uses the ABI header.
> It can support the new network device e810 and existing x722 iWARP device. It obsoletes
> providers/i40iw and extends its ABI. So why keep providers/i40iw around in rdma-core?

Why rewrite a perfectly good userspace that is compatible with the
future and past kernels?

Is there something so wrong with the userspace provider to need this?

Jason

^ permalink raw reply

* Re: [PATCH v5 rdma-next 1/6] RDMA/core: Create mmap database and cookie helper functions
From: Gal Pressman @ 2019-07-08 13:47 UTC (permalink / raw)
  To: Michal Kalderon, ariel.elior, jgg, dledford; +Cc: linux-rdma, davem, netdev
In-Reply-To: <20190708091503.14723-2-michal.kalderon@marvell.com>

On 08/07/2019 12:14, Michal Kalderon wrote:
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 8a6ccb936dfe..a830c2c5d691 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -2521,6 +2521,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
>  	SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
>  	SET_DEVICE_OP(dev_ops, map_phys_fmr);
>  	SET_DEVICE_OP(dev_ops, mmap);
> +	SET_DEVICE_OP(dev_ops, mmap_free);
>  	SET_DEVICE_OP(dev_ops, modify_ah);
>  	SET_DEVICE_OP(dev_ops, modify_cq);
>  	SET_DEVICE_OP(dev_ops, modify_device);
> diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> index ccf4d069c25c..7166741834c8 100644
> --- a/drivers/infiniband/core/rdma_core.c
> +++ b/drivers/infiniband/core/rdma_core.c
> @@ -817,6 +817,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
>  	rdma_restrack_del(&ucontext->res);
>  
>  	ib_dev->ops.dealloc_ucontext(ucontext);
> +	rdma_user_mmap_entries_remove_free(ucontext);

This should happen before dealloc_ucontext.

> +struct rdma_user_mmap_entry *
> +rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, u64 key, u64 len)
> +{
> +	struct rdma_user_mmap_entry *entry;
> +	u64 mmap_page;
> +
> +	mmap_page = key >> PAGE_SHIFT;
> +	if (mmap_page > U32_MAX)
> +		return NULL;
> +
> +	entry = xa_load(&ucontext->mmap_xa, mmap_page);
> +	if (!entry || rdma_user_mmap_get_key(entry) != key ||

I wonder if the 'rdma_user_mmap_get_key(entry) != key' check is still needed.

> +/*
> + * This is only called when the ucontext is destroyed and there can be no
> + * concurrent query via mmap or allocate on the xarray, thus we can be sure no
> + * other thread is using the entry pointer. We also know that all the BAR
> + * pages have either been zap'd or munmaped at this point.  Normal pages are
> + * refcounted and will be freed at the proper time.
> + */
> +void rdma_user_mmap_entries_remove_free(struct ib_ucontext *ucontext)
> +{
> +	struct rdma_user_mmap_entry *entry;
> +	unsigned long mmap_page;
> +
> +	xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
> +		xa_erase(&ucontext->mmap_xa, mmap_page);
> +
> +		ibdev_dbg(ucontext->device,
> +			  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
> +			  entry->obj, rdma_user_mmap_get_key(entry),
> +			  entry->address, entry->length);
> +		if (ucontext->device->ops.mmap_free)
> +			ucontext->device->ops.mmap_free(entry->address,
> +							entry->length,
> +							entry->mmap_flag);

Pass entry instead?

> +		kfree(entry);
> +	}
> +}
> +
>  void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
>  {
>  	struct rdma_umap_priv *priv, *next_priv;
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 26e9c2594913..54ce3fdae180 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -1425,6 +1425,8 @@ struct ib_ucontext {
>  	 * Implementation details of the RDMA core, don't use in drivers:
>  	 */
>  	struct rdma_restrack_entry res;
> +	struct xarray mmap_xa;
> +	u32 mmap_xa_page;
>  };
>  
>  struct ib_uobject {
> @@ -2311,6 +2313,7 @@ struct ib_device_ops {
>  			      struct ib_udata *udata);
>  	void (*dealloc_ucontext)(struct ib_ucontext *context);
>  	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
> +	void (*mmap_free)(u64 address, u64 length, u8 mmap_flag);

I feel like this callback needs some documentation.

>  	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
>  	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
>  	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
> @@ -2706,9 +2709,23 @@ void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
>  void ib_set_device_ops(struct ib_device *device,
>  		       const struct ib_device_ops *ops);
>  
> +#define RDMA_USER_MMAP_INVALID U64_MAX
> +struct rdma_user_mmap_entry {
> +	void  *obj;

I know EFA is the culprit here, but please remove the extra space :).

> +	u64 address;
> +	u64 length;
> +	u32 mmap_page;
> +	u8 mmap_flag;
> +};
> +

^ permalink raw reply

* Re: [PATCH for-next 5/8] RDMA/hns: Bugfix for calculating qp buffer size
From: oulijun @ 2019-07-08 13:46 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: dledford, leon, linux-rdma, linuxarm
In-Reply-To: <20190707122117.GB19566@ziepe.ca>

在 2019/7/7 20:21, Jason Gunthorpe 写道:
> On Sat, Jul 06, 2019 at 09:47:09AM +0800, oulijun wrote:
>> 在 2019/6/24 19:47, Lijun Ou 写道:
>>> From: o00290482 <o00290482@huawei.com>
>> Hi, Jason
>>    May be my local configuration error causing the wroong author.  How should I make changes?
>>
>> The correct as follows:
>> From: Lijun Ou <oulijun@huawei.com>
> I fixed it this once, but please check and fix it on your end in
> future.
>
> You should be able to set the patch author in git's config file:
>
> [user]
>         email = jgg@mellanox.com
>         name = Jason Gunthorpe
>
> Jason
>
> .
Thanks.




^ permalink raw reply

* [PATCH for-next 9/9] RDMA/hns: Refactor eq table init for hip08
From: Lijun Ou @ 2019-07-08 13:41 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm
In-Reply-To: <1562593285-8037-1-git-send-email-oulijun@huawei.com>

From: Yixian Liu <liuyixian@huawei.com>

To make the code more readable, here moves the part of
naming irq and request irq out of eq table init into an
seperate function.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 176 ++++++++++++++++++-----------
 1 file changed, 107 insertions(+), 69 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c2ddfc1..83c58be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -5735,6 +5735,94 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
 	return ret;
 }
 
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+				  int comp_num, int aeq_num, int other_num)
+{
+	struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+	int i, j;
+	int ret;
+
+	for (i = 0; i < irq_num; i++) {
+		hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+					       GFP_KERNEL);
+		if (!hr_dev->irq_names[i]) {
+			ret = -ENOMEM;
+			goto err_kzalloc_failed;
+		}
+	}
+
+	/* irq contains: abnormal + AEQ + CEQ*/
+	for (j = 0; j < irq_num; j++)
+		if (j < other_num)
+			snprintf((char *)hr_dev->irq_names[j],
+				 HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", j);
+		else if (j < (other_num + aeq_num))
+			snprintf((char *)hr_dev->irq_names[j],
+				 HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
+				 j - other_num);
+		else
+			snprintf((char *)hr_dev->irq_names[j],
+				 HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
+				 j - other_num - aeq_num);
+
+	for (j = 0; j < irq_num; j++) {
+		if (j < other_num)
+			ret = request_irq(hr_dev->irq[j],
+					  hns_roce_v2_msix_interrupt_abn,
+					  0, hr_dev->irq_names[j], hr_dev);
+
+		else if (j < (other_num + comp_num))
+			ret = request_irq(eq_table->eq[j - other_num].irq,
+					  hns_roce_v2_msix_interrupt_eq,
+					  0, hr_dev->irq_names[j + aeq_num],
+					  &eq_table->eq[j - other_num]);
+		else
+			ret = request_irq(eq_table->eq[j - other_num].irq,
+					  hns_roce_v2_msix_interrupt_eq,
+					  0, hr_dev->irq_names[j - comp_num],
+					  &eq_table->eq[j - other_num]);
+		if (ret) {
+			dev_err(hr_dev->dev, "Request irq error!\n");
+			goto err_request_failed;
+		}
+	}
+
+	return 0;
+
+err_request_failed:
+	for (j -= 1; j >= 0; j--)
+		if (j < other_num)
+			free_irq(hr_dev->irq[j], hr_dev);
+		else
+			free_irq(eq_table->eq[j - other_num].irq,
+				 &eq_table->eq[j - other_num]);
+
+err_kzalloc_failed:
+	for (i -= 1; i >= 0; i--)
+		kfree(hr_dev->irq_names[i]);
+
+	return ret;
+}
+
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
+{
+	int irq_num;
+	int eq_num;
+	int i;
+
+	eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+	irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+	for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+		free_irq(hr_dev->irq[i], hr_dev);
+
+	for (i = 0; i < eq_num; i++)
+		free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+
+	for (i = 0; i < irq_num; i++)
+		kfree(hr_dev->irq_names[i]);
+}
+
 static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
@@ -5746,7 +5834,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 	int other_num;
 	int comp_num;
 	int aeq_num;
-	int i, j, k;
+	int i;
 	int ret;
 
 	other_num = hr_dev->caps.num_other_vectors;
@@ -5760,27 +5848,18 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 	if (!eq_table->eq)
 		return -ENOMEM;
 
-	for (i = 0; i < irq_num; i++) {
-		hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
-					       GFP_KERNEL);
-		if (!hr_dev->irq_names[i]) {
-			ret = -ENOMEM;
-			goto err_failed_kzalloc;
-		}
-	}
-
 	/* create eq */
-	for (j = 0; j < eq_num; j++) {
-		eq = &eq_table->eq[j];
+	for (i = 0; i < eq_num; i++) {
+		eq = &eq_table->eq[i];
 		eq->hr_dev = hr_dev;
-		eq->eqn = j;
-		if (j < comp_num) {
+		eq->eqn = i;
+		if (i < comp_num) {
 			/* CEQ */
 			eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
 			eq->type_flag = HNS_ROCE_CEQ;
 			eq->entries = hr_dev->caps.ceqe_depth;
 			eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
-			eq->irq = hr_dev->irq[j + other_num + aeq_num];
+			eq->irq = hr_dev->irq[i + other_num + aeq_num];
 			eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
 			eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
 		} else {
@@ -5789,7 +5868,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 			eq->type_flag = HNS_ROCE_AEQ;
 			eq->entries = hr_dev->caps.aeqe_depth;
 			eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
-			eq->irq = hr_dev->irq[j - comp_num + other_num];
+			eq->irq = hr_dev->irq[i - comp_num + other_num];
 			eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
 			eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
 		}
@@ -5804,40 +5883,11 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 	/* enable irq */
 	hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
 
-	/* irq contains: abnormal + AEQ + CEQ*/
-	for (k = 0; k < irq_num; k++)
-		if (k < other_num)
-			snprintf((char *)hr_dev->irq_names[k],
-				 HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
-		else if (k < (other_num + aeq_num))
-			snprintf((char *)hr_dev->irq_names[k],
-				 HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
-				 k - other_num);
-		else
-			snprintf((char *)hr_dev->irq_names[k],
-				 HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
-				 k - other_num - aeq_num);
-
-	for (k = 0; k < irq_num; k++) {
-		if (k < other_num)
-			ret = request_irq(hr_dev->irq[k],
-					  hns_roce_v2_msix_interrupt_abn,
-					  0, hr_dev->irq_names[k], hr_dev);
-
-		else if (k < (other_num + comp_num))
-			ret = request_irq(eq_table->eq[k - other_num].irq,
-					  hns_roce_v2_msix_interrupt_eq,
-					  0, hr_dev->irq_names[k + aeq_num],
-					  &eq_table->eq[k - other_num]);
-		else
-			ret = request_irq(eq_table->eq[k - other_num].irq,
-					  hns_roce_v2_msix_interrupt_eq,
-					  0, hr_dev->irq_names[k - comp_num],
-					  &eq_table->eq[k - other_num]);
-		if (ret) {
-			dev_err(dev, "Request irq error!\n");
-			goto err_request_irq_fail;
-		}
+	ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num,
+				     aeq_num, other_num);
+	if (ret) {
+		dev_err(dev, "Request irq failed.\n");
+		goto err_request_irq_fail;
 	}
 
 	hr_dev->irq_workq =
@@ -5845,26 +5895,20 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 	if (!hr_dev->irq_workq) {
 		dev_err(dev, "Create irq workqueue failed!\n");
 		ret = -ENOMEM;
-		goto err_request_irq_fail;
+		goto err_create_wq_fail;
 	}
 
 	return 0;
 
+err_create_wq_fail:
+	__hns_roce_free_irq(hr_dev);
+
 err_request_irq_fail:
-	for (k -= 1; k >= 0; k--)
-		if (k < other_num)
-			free_irq(hr_dev->irq[k], hr_dev);
-		else
-			free_irq(eq_table->eq[k - other_num].irq,
-				 &eq_table->eq[k - other_num]);
+	hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
 
 err_create_eq_fail:
-	for (j -= 1; j >= 0; j--)
-		hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
-
-err_failed_kzalloc:
 	for (i -= 1; i >= 0; i--)
-		kfree(hr_dev->irq_names[i]);
+		hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
 	kfree(eq_table->eq);
 
 	return ret;
@@ -5883,20 +5927,14 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
 	/* Disable irq */
 	hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
 
-	for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
-		free_irq(hr_dev->irq[i], hr_dev);
+	__hns_roce_free_irq(hr_dev);
 
 	for (i = 0; i < eq_num; i++) {
 		hns_roce_v2_destroy_eqc(hr_dev, i);
 
-		free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
-
 		hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
 	}
 
-	for (i = 0; i < irq_num; i++)
-		kfree(hr_dev->irq_names[i]);
-
 	kfree(eq_table->eq);
 
 	flush_workqueue(hr_dev->irq_workq);
-- 
1.9.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox