* [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
@ 2014-11-04 9:23 Zheng Li
[not found] ` <1415093024-13041-1-git-send-email-zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Zheng Li @ 2014-11-04 9:23 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
ogerlitz-VPRAkNaXOzVWk0Htik3J/w, yishaih-VPRAkNaXOzVWk0Htik3J/w,
jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb,
eli-VPRAkNaXOzVWk0Htik3J/w
Cc: joe.jin-QHcLZuEGTsvQT0dZR+AlfA,
john.sobecki-QHcLZuEGTsvQT0dZR+AlfA,
guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA,
zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA
The mlx4_ib_cq_comp dispatching was racy and this is fixed by using
rcu locking. However, that introduced regression in ib port failure
tests, so changing to rwlock.
rcu_XXX api are used to synchronize threads and use preempt_disable/enable
to make sure that no thread cross other thread but is not safe for interrupts
routines. In this case we are in inteerupt context: mlx4_msi_x_interrupt ->
mlx4_eq_int -> mlx4_cq_completion, so can't use rcu_XXX to protect this code.
The stack is below:
crash> bt
PID: 32068 TASK: ffff880ed14405c0 CPU: 20 COMMAND: "kworker/u:1"
#0 [ffff880ec7c01a00] __schedule at ffffffff81506664
#1 [ffff880ec7c01b18] schedule at ffffffff81506d45
#2 [ffff880ec7c01b28] schedule_timeout at ffffffff8150719c
#3 [ffff880ec7c01bd8] wait_for_common at ffffffff81506bcd
#4 [ffff880ec7c01c68] wait_for_completion at ffffffff81506cfd
#5 [ffff880ec7c01c78] synchronize_sched at ffffffff810dda48
#6 [ffff880ec7c01cc8] mlx4_cq_free at ffffffffa027e67e [mlx4_core]
#7 [ffff880ec7c01cf8] mlx4_ib_destroy_cq at ffffffffa03238a3 [mlx4_ib]
#8 [ffff880ec7c01d18] ib_destroy_cq at ffffffffa0301c9e [ib_core]
#9 [ffff880ec7c01d28] rds_ib_conn_shutdown at ffffffffa041cf5d [rds_rdma]
#10 [ffff880ec7c01dd8] rds_conn_shutdown at ffffffffa02b7f18 [rds]
#11 [ffff880ec7c01e38] rds_shutdown_worker at ffffffffa02bd14a [rds]
#12 [ffff880ec7c01e58] process_one_work at ffffffff8108c3b9
#13 [ffff880ec7c01ea8] worker_thread at ffffffff8108ccfa
#14 [ffff880ec7c01ee8] kthread at ffffffff810912d7
#15 [ffff880ec7c01f48] kernel_thread_helper at ffffffff815123c4
Signed-off-by: Zheng Li <zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Joe Jin <joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Guru <guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Signed-off-by: John Sobecki <john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
drivers/net/ethernet/mellanox/mlx4/cq.c | 26 +++++++++++++++-----------
drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +-
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 56022d6..8b3b849 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -54,15 +54,19 @@
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
+ struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
- cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
- cqn & (dev->caps.num_cqs - 1));
+ read_lock(&cq_table->lock);
+
+ cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
if (!cq) {
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
return;
}
+ read_unlock(&cq_table->lock);
+
++cq->arm_sn;
cq->comp(cq);
@@ -73,13 +77,13 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
- spin_lock(&cq_table->lock);
+ read_lock(&cq_table->lock);
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
if (cq)
atomic_inc(&cq->refcount);
- spin_unlock(&cq_table->lock);
+ read_unlock(&cq_table->lock);
if (!cq) {
mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
@@ -256,9 +260,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
if (err)
return err;
- spin_lock_irq(&cq_table->lock);
+ write_lock_irq(&cq_table->lock);
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
- spin_unlock_irq(&cq_table->lock);
+ write_unlock_irq(&cq_table->lock);
if (err)
goto err_icm;
@@ -297,9 +301,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
return 0;
err_radix:
- spin_lock_irq(&cq_table->lock);
+ write_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
- spin_unlock_irq(&cq_table->lock);
+ write_unlock_irq(&cq_table->lock);
err_icm:
mlx4_cq_free_icm(dev, cq->cqn);
@@ -320,9 +324,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
synchronize_irq(priv->eq_table.eq[cq->vector].irq);
- spin_lock_irq(&cq_table->lock);
+ write_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
- spin_unlock_irq(&cq_table->lock);
+ write_unlock_irq(&cq_table->lock);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
@@ -337,7 +341,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
int err;
- spin_lock_init(&cq_table->lock);
+ rwlock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
if (mlx4_is_slave(dev))
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index de10dbb..42e2348 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -642,7 +642,7 @@ struct mlx4_mr_table {
struct mlx4_cq_table {
struct mlx4_bitmap bitmap;
- spinlock_t lock;
+ rwlock_t lock;
struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
--
1.7.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
[not found] ` <1415093024-13041-1-git-send-email-zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2014-11-04 10:51 ` Sagi Grimberg
[not found] ` <5458AFCA.8020309-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Sagi Grimberg @ 2014-11-04 10:51 UTC (permalink / raw)
To: Zheng Li, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
ogerlitz-VPRAkNaXOzVWk0Htik3J/w, yishaih-VPRAkNaXOzVWk0Htik3J/w,
jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb,
eli-VPRAkNaXOzVWk0Htik3J/w
Cc: joe.jin-QHcLZuEGTsvQT0dZR+AlfA,
john.sobecki-QHcLZuEGTsvQT0dZR+AlfA,
guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA
On 11/4/2014 11:23 AM, Zheng Li wrote:
> The mlx4_ib_cq_comp dispatching was racy and this is fixed by using
> rcu locking. However, that introduced regression in ib port failure
> tests, so changing to rwlock.
> rcu_XXX api are used to synchronize threads and use preempt_disable/enable
> to make sure that no thread cross other thread but is not safe for interrupts
> routines. In this case we are in inteerupt context: mlx4_msi_x_interrupt ->
> mlx4_eq_int -> mlx4_cq_completion, so can't use rcu_XXX to protect this code.
Did you check the performance (latency) implications of grabbing a
read_lock in the hot path?
If I understand correctly, mlx4_cq_free is racing against
mlx4_cq_completion. What about dis-arming the cq (assuming that it is
possible) after calling synchronize_irq()?
I'm just not very happy with acquiring a lock (even a read_lock) in
such a critical path.
>
> The stack is below:
> crash> bt
> PID: 32068 TASK: ffff880ed14405c0 CPU: 20 COMMAND: "kworker/u:1"
> #0 [ffff880ec7c01a00] __schedule at ffffffff81506664
> #1 [ffff880ec7c01b18] schedule at ffffffff81506d45
> #2 [ffff880ec7c01b28] schedule_timeout at ffffffff8150719c
> #3 [ffff880ec7c01bd8] wait_for_common at ffffffff81506bcd
> #4 [ffff880ec7c01c68] wait_for_completion at ffffffff81506cfd
> #5 [ffff880ec7c01c78] synchronize_sched at ffffffff810dda48
> #6 [ffff880ec7c01cc8] mlx4_cq_free at ffffffffa027e67e [mlx4_core]
> #7 [ffff880ec7c01cf8] mlx4_ib_destroy_cq at ffffffffa03238a3 [mlx4_ib]
> #8 [ffff880ec7c01d18] ib_destroy_cq at ffffffffa0301c9e [ib_core]
> #9 [ffff880ec7c01d28] rds_ib_conn_shutdown at ffffffffa041cf5d [rds_rdma]
> #10 [ffff880ec7c01dd8] rds_conn_shutdown at ffffffffa02b7f18 [rds]
> #11 [ffff880ec7c01e38] rds_shutdown_worker at ffffffffa02bd14a [rds]
> #12 [ffff880ec7c01e58] process_one_work at ffffffff8108c3b9
> #13 [ffff880ec7c01ea8] worker_thread at ffffffff8108ccfa
> #14 [ffff880ec7c01ee8] kthread at ffffffff810912d7
> #15 [ffff880ec7c01f48] kernel_thread_helper at ffffffff815123c4
>
> Signed-off-by: Zheng Li <zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Joe Jin <joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Guru <guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: John Sobecki <john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> ---
> drivers/net/ethernet/mellanox/mlx4/cq.c | 26 +++++++++++++++-----------
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +-
> 2 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
> index 56022d6..8b3b849 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/cq.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
> @@ -54,15 +54,19 @@
>
> void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
> {
> + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
> struct mlx4_cq *cq;
>
> - cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
> - cqn & (dev->caps.num_cqs - 1));
> + read_lock(&cq_table->lock);
> +
> + cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
> if (!cq) {
> mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
> return;
> }
>
> + read_unlock(&cq_table->lock);
> +
> ++cq->arm_sn;
>
> cq->comp(cq);
> @@ -73,13 +77,13 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
> struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
> struct mlx4_cq *cq;
>
> - spin_lock(&cq_table->lock);
> + read_lock(&cq_table->lock);
>
> cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
> if (cq)
> atomic_inc(&cq->refcount);
>
> - spin_unlock(&cq_table->lock);
> + read_unlock(&cq_table->lock);
>
> if (!cq) {
> mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
> @@ -256,9 +260,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
> if (err)
> return err;
>
> - spin_lock_irq(&cq_table->lock);
> + write_lock_irq(&cq_table->lock);
> err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
> - spin_unlock_irq(&cq_table->lock);
> + write_unlock_irq(&cq_table->lock);
> if (err)
> goto err_icm;
>
> @@ -297,9 +301,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
> return 0;
>
> err_radix:
> - spin_lock_irq(&cq_table->lock);
> + write_lock_irq(&cq_table->lock);
> radix_tree_delete(&cq_table->tree, cq->cqn);
> - spin_unlock_irq(&cq_table->lock);
> + write_unlock_irq(&cq_table->lock);
>
> err_icm:
> mlx4_cq_free_icm(dev, cq->cqn);
> @@ -320,9 +324,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
>
> synchronize_irq(priv->eq_table.eq[cq->vector].irq);
>
> - spin_lock_irq(&cq_table->lock);
> + write_lock_irq(&cq_table->lock);
> radix_tree_delete(&cq_table->tree, cq->cqn);
> - spin_unlock_irq(&cq_table->lock);
> + write_unlock_irq(&cq_table->lock);
>
> if (atomic_dec_and_test(&cq->refcount))
> complete(&cq->free);
> @@ -337,7 +341,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
> struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
> int err;
>
> - spin_lock_init(&cq_table->lock);
> + rwlock_init(&cq_table->lock);
> INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
> if (mlx4_is_slave(dev))
> return 0;
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index de10dbb..42e2348 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -642,7 +642,7 @@ struct mlx4_mr_table {
>
> struct mlx4_cq_table {
> struct mlx4_bitmap bitmap;
> - spinlock_t lock;
> + rwlock_t lock;
> struct radix_tree_root tree;
> struct mlx4_icm_table table;
> struct mlx4_icm_table cmpt_table;
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
[not found] ` <5458AFCA.8020309-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2014-11-04 11:32 ` Shachar Raindel
[not found] ` <0d09d3ae52134cf4bfe4e8554eb34185-LOZWmgKjnYgQouBfZGh8ttqRiQSDpxhJvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2014-11-04 19:15 ` John Sobecki
1 sibling, 1 reply; 5+ messages in thread
From: Shachar Raindel @ 2014-11-04 11:32 UTC (permalink / raw)
To: Sagi Grimberg, Zheng Li,
linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Or Gerlitz,
Yishai Hadas,
jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org,
Eli Cohen
Cc: joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org
> -----Original Message-----
> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Sagi Grimberg
> Sent: Tuesday, November 04, 2014 12:52 PM
> To: Zheng Li; linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; Or Gerlitz; Yishai Hadas;
> jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org; Eli Cohen
> Cc: joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org; john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org;
> guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org
> Subject: Re: [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
>
> On 11/4/2014 11:23 AM, Zheng Li wrote:
> > The mlx4_ib_cq_comp dispatching was racy and this is fixed by using
> > rcu locking. However, that introduced regression in ib port failure
> > tests, so changing to rwlock.
> > rcu_XXX api are used to synchronize threads and use
> preempt_disable/enable
> > to make sure that no thread cross other thread but is not safe for
> interrupts
> > routines. In this case we are in inteerupt context:
> mlx4_msi_x_interrupt ->
> > mlx4_eq_int -> mlx4_cq_completion, so can't use rcu_XXX to protect
> this code.
>
> Did you check the performance (latency) implications of grabbing a
> read_lock in the hot path?
>
> If I understand correctly, mlx4_cq_free is racing against
> mlx4_cq_completion. What about dis-arming the cq (assuming that it is
> possible) after calling synchronize_irq()?
>
> I'm just not very happy with acquiring a lock (even a read_lock) in
> such a critical path.
>
There are 2 separate issues here:
a. The hot path in the linux kernel is currently missing a lock. The
following race can occur with the current code:
Thread 1 | Thread 2
-----------------------------+----------------------------------------
mlx4_cq_free(cq_1) | mlx4_cq_completion(cq_2)
| radix_tree_lookup(cq_2) (in progress)
radix_tree_delete(cq_1) | (traversal is still ongoing)
free some radix tree nodes|
| access free'd memory
For this issue, there are 3 possible fixes:
1. The patch discussed here, performing an explicit read locking on the
hot path and risking scalability issues due to cache line bouncing.
2. Similar, spin_lock based approach proposed 2 years ago:
http://marc.info/?t=134632316600001&r=1&w=2
3. A solution using RCU, included in Mellanox OFED.
b. The issue causing the stack trace below. Note that the stack
trace is not referring the upstream code, as there is no place in the
current mlx4_cq_free code that synchronizes the RCU. This indicates
that the bug report is referring to the Mellanox OFED version of this
code. However, the stack trace does not explain the regression
described above. Can you please provide:
- The exact details of the code base you are referring to in the report?
- A better description of the issue - you can use rcu_read_lock from
interrupt context, so something else is going on here.
> >
> > The stack is below:
> > crash> bt
> > PID: 32068 TASK: ffff880ed14405c0 CPU: 20 COMMAND: "kworker/u:1"
> > #0 [ffff880ec7c01a00] __schedule at ffffffff81506664
> > #1 [ffff880ec7c01b18] schedule at ffffffff81506d45
> > #2 [ffff880ec7c01b28] schedule_timeout at ffffffff8150719c
> > #3 [ffff880ec7c01bd8] wait_for_common at ffffffff81506bcd
> > #4 [ffff880ec7c01c68] wait_for_completion at ffffffff81506cfd
> > #5 [ffff880ec7c01c78] synchronize_sched at ffffffff810dda48
> > #6 [ffff880ec7c01cc8] mlx4_cq_free at ffffffffa027e67e [mlx4_core]
> > #7 [ffff880ec7c01cf8] mlx4_ib_destroy_cq at ffffffffa03238a3
> [mlx4_ib]
> > #8 [ffff880ec7c01d18] ib_destroy_cq at ffffffffa0301c9e [ib_core]
> > #9 [ffff880ec7c01d28] rds_ib_conn_shutdown at ffffffffa041cf5d
> [rds_rdma]
> > #10 [ffff880ec7c01dd8] rds_conn_shutdown at ffffffffa02b7f18 [rds]
> > #11 [ffff880ec7c01e38] rds_shutdown_worker at ffffffffa02bd14a [rds]
> > #12 [ffff880ec7c01e58] process_one_work at ffffffff8108c3b9
> > #13 [ffff880ec7c01ea8] worker_thread at ffffffff8108ccfa
> > #14 [ffff880ec7c01ee8] kthread at ffffffff810912d7
> > #15 [ffff880ec7c01f48] kernel_thread_helper at ffffffff815123c4
> >
> > Signed-off-by: Zheng Li <zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Signed-off-by: Joe Jin <joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Signed-off-by: Guru <guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > Signed-off-by: John Sobecki <john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > ---
> > drivers/net/ethernet/mellanox/mlx4/cq.c | 26 +++++++++++++++----
> -------
> > drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +-
> > 2 files changed, 16 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c
> b/drivers/net/ethernet/mellanox/mlx4/cq.c
> > index 56022d6..8b3b849 100644
> > --- a/drivers/net/ethernet/mellanox/mlx4/cq.c
> > +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
> > @@ -54,15 +54,19 @@
> >
> > void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
> > {
> > + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
> > struct mlx4_cq *cq;
> >
> > - cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
> > - cqn & (dev->caps.num_cqs - 1));
> > + read_lock(&cq_table->lock);
> > +
> > + cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs -
> 1));
> > if (!cq) {
> > mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
> > return;
> > }
> >
> > + read_unlock(&cq_table->lock);
> > +
> > ++cq->arm_sn;
> >
> > cq->comp(cq);
> > @@ -73,13 +77,13 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn,
> int event_type)
> > struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
> > struct mlx4_cq *cq;
> >
> > - spin_lock(&cq_table->lock);
> > + read_lock(&cq_table->lock);
> >
> > cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs -
> 1));
> > if (cq)
> > atomic_inc(&cq->refcount);
> >
> > - spin_unlock(&cq_table->lock);
> > + read_unlock(&cq_table->lock);
> >
> > if (!cq) {
> > mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
> > @@ -256,9 +260,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
> > if (err)
> > return err;
> >
> > - spin_lock_irq(&cq_table->lock);
> > + write_lock_irq(&cq_table->lock);
> > err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
> > - spin_unlock_irq(&cq_table->lock);
> > + write_unlock_irq(&cq_table->lock);
> > if (err)
> > goto err_icm;
> >
> > @@ -297,9 +301,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
> > return 0;
> >
> > err_radix:
> > - spin_lock_irq(&cq_table->lock);
> > + write_lock_irq(&cq_table->lock);
> > radix_tree_delete(&cq_table->tree, cq->cqn);
> > - spin_unlock_irq(&cq_table->lock);
> > + write_unlock_irq(&cq_table->lock);
> >
> > err_icm:
> > mlx4_cq_free_icm(dev, cq->cqn);
> > @@ -320,9 +324,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct
> mlx4_cq *cq)
> >
> > synchronize_irq(priv->eq_table.eq[cq->vector].irq);
> >
> > - spin_lock_irq(&cq_table->lock);
> > + write_lock_irq(&cq_table->lock);
> > radix_tree_delete(&cq_table->tree, cq->cqn);
> > - spin_unlock_irq(&cq_table->lock);
> > + write_unlock_irq(&cq_table->lock);
> >
> > if (atomic_dec_and_test(&cq->refcount))
> > complete(&cq->free);
> > @@ -337,7 +341,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
> > struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
> > int err;
> >
> > - spin_lock_init(&cq_table->lock);
> > + rwlock_init(&cq_table->lock);
> > INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
> > if (mlx4_is_slave(dev))
> > return 0;
> > diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> > index de10dbb..42e2348 100644
> > --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> > +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> > @@ -642,7 +642,7 @@ struct mlx4_mr_table {
> >
> > struct mlx4_cq_table {
> > struct mlx4_bitmap bitmap;
> > - spinlock_t lock;
> > + rwlock_t lock;
> > struct radix_tree_root tree;
> > struct mlx4_icm_table table;
> > struct mlx4_icm_table cmpt_table;
> >
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
[not found] ` <5458AFCA.8020309-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-11-04 11:32 ` Shachar Raindel
@ 2014-11-04 19:15 ` John Sobecki
1 sibling, 0 replies; 5+ messages in thread
From: John Sobecki @ 2014-11-04 19:15 UTC (permalink / raw)
To: Sagi Grimberg
Cc: Zheng Li, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
ogerlitz-VPRAkNaXOzVWk0Htik3J/w, yishaih-VPRAkNaXOzVWk0Htik3J/w,
jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb,
eli-VPRAkNaXOzVWk0Htik3J/w, joe.jin-QHcLZuEGTsvQT0dZR+AlfA,
guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA
[-- Attachment #1: Type: text/plain, Size: 7218 bytes --]
Hi Sagi,
We were also concerned about a performance impact, and had our ExaData team
run a perf regression test against the unpatched baseline kernel
2.6.39-400.128.17.el5uek. Results as follows (sorry about the wrapped lines).
Thanks, John
--------------------------------------
The performance run data look good. Here is the comparison between 128.17 and
128.19 (with bug fix for 19831407).
The performance using the two kernels is almost identical (with less than 1%
variation).
I am trying to test another case where one of the IB switches fails (it passed).
Thanks
Xumin
2.6.39-400.128.17.el5uek
runid tsks tx/s tx+rx-M/s mbi-M/s mko-M/s tx-us/c rtt-us Comp node CPU Cell
node CPU Comments
3588 112 1039892 508 0 8124 8.4 1716 6254 6876 8k write
3589 112 1081637 528 8450 0 6.8 1643 5587 7032 8K read
3590 112 11200 5 0 11213 113.2 159811 389 227 1M write
3591 112 12208 6 12234 0 51.1 146472 407 297 1M read
2.6.39-400.128.19.el5uek.2bug19831407V2
runid tsks tx/s tx+rx-M/s mbi-M/s mko-M/s tx-us/c rtt-us Comp node CPU Cell
node CPU Comments
3597 112 1054032 515 0 8235 8.4 1689 6030 6796 8k write
3598 112 1077019 526 8414 0 6.6 1650 5239 6798 8K read
3599 112 11256 5 0 11260 96.1 159138 367 217 1M write
3600 112 12220 6 12234 0 47.1 146474 359 212 1M read
On 11/04/2014 03:51 AM, Sagi Grimberg wrote:
> On 11/4/2014 11:23 AM, Zheng Li wrote:
>> The mlx4_ib_cq_comp dispatching was racy and this is fixed by using
>> rcu locking. However, that introduced regression in ib port failure
>> tests, so changing to rwlock.
>> rcu_XXX api are used to synchronize threads and use preempt_disable/enable
>> to make sure that no thread cross other thread but is not safe for interrupts
>> routines. In this case we are in inteerupt context: mlx4_msi_x_interrupt ->
>> mlx4_eq_int -> mlx4_cq_completion, so can't use rcu_XXX to protect this code.
>
> Did you check the performance (latency) implications of grabbing a
> read_lock in the hot path?
>
> If I understand correctly, mlx4_cq_free is racing against
> mlx4_cq_completion. What about dis-arming the cq (assuming that it is possible)
> after calling synchronize_irq()?
>
> I'm just not very happy with acquiring a lock (even a read_lock) in
> such a critical path.
>
>>
>> The stack is below:
>> crash> bt
>> PID: 32068 TASK: ffff880ed14405c0 CPU: 20 COMMAND: "kworker/u:1"
>> #0 [ffff880ec7c01a00] __schedule at ffffffff81506664
>> #1 [ffff880ec7c01b18] schedule at ffffffff81506d45
>> #2 [ffff880ec7c01b28] schedule_timeout at ffffffff8150719c
>> #3 [ffff880ec7c01bd8] wait_for_common at ffffffff81506bcd
>> #4 [ffff880ec7c01c68] wait_for_completion at ffffffff81506cfd
>> #5 [ffff880ec7c01c78] synchronize_sched at ffffffff810dda48
>> #6 [ffff880ec7c01cc8] mlx4_cq_free at ffffffffa027e67e [mlx4_core]
>> #7 [ffff880ec7c01cf8] mlx4_ib_destroy_cq at ffffffffa03238a3 [mlx4_ib]
>> #8 [ffff880ec7c01d18] ib_destroy_cq at ffffffffa0301c9e [ib_core]
>> #9 [ffff880ec7c01d28] rds_ib_conn_shutdown at ffffffffa041cf5d [rds_rdma]
>> #10 [ffff880ec7c01dd8] rds_conn_shutdown at ffffffffa02b7f18 [rds]
>> #11 [ffff880ec7c01e38] rds_shutdown_worker at ffffffffa02bd14a [rds]
>> #12 [ffff880ec7c01e58] process_one_work at ffffffff8108c3b9
>> #13 [ffff880ec7c01ea8] worker_thread at ffffffff8108ccfa
>> #14 [ffff880ec7c01ee8] kthread at ffffffff810912d7
>> #15 [ffff880ec7c01f48] kernel_thread_helper at ffffffff815123c4
>>
>> Signed-off-by: Zheng Li <zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> Signed-off-by: Joe Jin <joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> Signed-off-by: Guru <guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> Signed-off-by: John Sobecki <john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>> ---
>> drivers/net/ethernet/mellanox/mlx4/cq.c | 26 +++++++++++++++-----------
>> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +-
>> 2 files changed, 16 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c
>> b/drivers/net/ethernet/mellanox/mlx4/cq.c
>> index 56022d6..8b3b849 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/cq.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
>> @@ -54,15 +54,19 @@
>>
>> void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
>> {
>> + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
>> struct mlx4_cq *cq;
>>
>> - cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
>> - cqn & (dev->caps.num_cqs - 1));
>> + read_lock(&cq_table->lock);
>> +
>> + cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
>> if (!cq) {
>> mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
>> return;
>> }
>>
>> + read_unlock(&cq_table->lock);
>> +
>> ++cq->arm_sn;
>>
>> cq->comp(cq);
>> @@ -73,13 +77,13 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int
>> event_type)
>> struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
>> struct mlx4_cq *cq;
>>
>> - spin_lock(&cq_table->lock);
>> + read_lock(&cq_table->lock);
>>
>> cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
>> if (cq)
>> atomic_inc(&cq->refcount);
>>
>> - spin_unlock(&cq_table->lock);
>> + read_unlock(&cq_table->lock);
>>
>> if (!cq) {
>> mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
>> @@ -256,9 +260,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
>> if (err)
>> return err;
>>
>> - spin_lock_irq(&cq_table->lock);
>> + write_lock_irq(&cq_table->lock);
>> err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
>> - spin_unlock_irq(&cq_table->lock);
>> + write_unlock_irq(&cq_table->lock);
>> if (err)
>> goto err_icm;
>>
>> @@ -297,9 +301,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
>> return 0;
>>
>> err_radix:
>> - spin_lock_irq(&cq_table->lock);
>> + write_lock_irq(&cq_table->lock);
>> radix_tree_delete(&cq_table->tree, cq->cqn);
>> - spin_unlock_irq(&cq_table->lock);
>> + write_unlock_irq(&cq_table->lock);
>>
>> err_icm:
>> mlx4_cq_free_icm(dev, cq->cqn);
>> @@ -320,9 +324,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
>>
>> synchronize_irq(priv->eq_table.eq[cq->vector].irq);
>>
>> - spin_lock_irq(&cq_table->lock);
>> + write_lock_irq(&cq_table->lock);
>> radix_tree_delete(&cq_table->tree, cq->cqn);
>> - spin_unlock_irq(&cq_table->lock);
>> + write_unlock_irq(&cq_table->lock);
>>
>> if (atomic_dec_and_test(&cq->refcount))
>> complete(&cq->free);
>> @@ -337,7 +341,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
>> struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
>> int err;
>>
>> - spin_lock_init(&cq_table->lock);
>> + rwlock_init(&cq_table->lock);
>> INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
>> if (mlx4_is_slave(dev))
>> return 0;
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>> b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>> index de10dbb..42e2348 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>> @@ -642,7 +642,7 @@ struct mlx4_mr_table {
>>
>> struct mlx4_cq_table {
>> struct mlx4_bitmap bitmap;
>> - spinlock_t lock;
>> + rwlock_t lock;
>> struct radix_tree_root tree;
>> struct mlx4_icm_table table;
>> struct mlx4_icm_table cmpt_table;
>>
>
[-- Attachment #2: john_sobecki.vcf --]
[-- Type: text/x-vcard, Size: 152 bytes --]
begin:vcard
fn:John Sobecki
n:Sobecki;John
org:Oracle Linux Kernel Engineering
tel;work:+1 719 757 3705
tel;cell:+1 719 332 1981
version:2.1
end:vcard
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
[not found] ` <0d09d3ae52134cf4bfe4e8554eb34185-LOZWmgKjnYgQouBfZGh8ttqRiQSDpxhJvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2014-11-05 8:47 ` zheng.li
0 siblings, 0 replies; 5+ messages in thread
From: zheng.li @ 2014-11-05 8:47 UTC (permalink / raw)
To: Shachar Raindel
Cc: Sagi Grimberg, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Or Gerlitz, Yishai Hadas,
jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org,
Eli Cohen, joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org
于 2014年11月04日 19:32, Shachar Raindel 写道:
>
>> -----Original Message-----
>> From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-
>> owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Sagi Grimberg
>> Sent: Tuesday, November 04, 2014 12:52 PM
>> To: Zheng Li; linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; Or Gerlitz; Yishai Hadas;
>> jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org; Eli Cohen
>> Cc: joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org; john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org;
>> guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org
>> Subject: Re: [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ.
>>
>> On 11/4/2014 11:23 AM, Zheng Li wrote:
>>> The mlx4_ib_cq_comp dispatching was racy and this is fixed by using
>>> rcu locking. However, that introduced regression in ib port failure
>>> tests, so changing to rwlock.
>>> rcu_XXX api are used to synchronize threads and use
>> preempt_disable/enable
>>> to make sure that no thread cross other thread but is not safe for
>> interrupts
>>> routines. In this case we are in inteerupt context:
>> mlx4_msi_x_interrupt ->
>>> mlx4_eq_int -> mlx4_cq_completion, so can't use rcu_XXX to protect
>> this code.
>>
>> Did you check the performance (latency) implications of grabbing a
>> read_lock in the hot path?
>>
>> If I understand correctly, mlx4_cq_free is racing against
>> mlx4_cq_completion. What about dis-arming the cq (assuming that it is
>> possible) after calling synchronize_irq()?
>>
>> I'm just not very happy with acquiring a lock (even a read_lock) in
>> such a critical path.
>>
>
> There are 2 separate issues here:
>
> a. The hot path in the linux kernel is currently missing a lock. The
> following race can occur with the current code:
>
> Thread 1 | Thread 2
> -----------------------------+----------------------------------------
> mlx4_cq_free(cq_1) | mlx4_cq_completion(cq_2)
> | radix_tree_lookup(cq_2) (in progress)
> radix_tree_delete(cq_1) | (traversal is still ongoing)
> free some radix tree nodes|
> | access free'd memory
>
> For this issue, there are 3 possible fixes:
> 1. The patch discussed here, performing an explicit read locking on the
> hot path and risking scalability issues due to cache line bouncing.
> 2. Similar, spin_lock based approach proposed 2 years ago:
> http://marc.info/?t=134632316600001&r=1&w=2
> 3. A solution using RCU, included in Mellanox OFED.
Hi Shachar,
Our original patch is based on "3. A solution using RCU, included in
Mellanox OFED", our kernel merge it with rcu, finally found the hang
issue in free_cq(), then make the patch with rwlock instead of rcu to
fix the hang problem. I pull the latest upstream code and make a patch
based on it, send it to linux-rdma to review.
>
> b. The issue causing the stack trace below. Note that the stack
> trace is not referring the upstream code, as there is no place in the
> current mlx4_cq_free code that synchronizes the RCU. This indicates
> that the bug report is referring to the Mellanox OFED version of this
> code. However, the stack trace does not explain the regression
> described above. Can you please provide:
> - The exact details of the code base you are referring to in the report?
> - A better description of the issue - you can use rcu_read_lock from
> interrupt context, so something else is going on here.
Yes ,our original patch is based on the source code which merging "A
solution using RCU, included in Mellanox OFED", in order to send to
linux-rdma, we made a patch based on the latest upstream code and sent
it to review.
>
>
>>>
>>> The stack is below:
>>> crash> bt
>>> PID: 32068 TASK: ffff880ed14405c0 CPU: 20 COMMAND: "kworker/u:1"
>>> #0 [ffff880ec7c01a00] __schedule at ffffffff81506664
>>> #1 [ffff880ec7c01b18] schedule at ffffffff81506d45
>>> #2 [ffff880ec7c01b28] schedule_timeout at ffffffff8150719c
>>> #3 [ffff880ec7c01bd8] wait_for_common at ffffffff81506bcd
>>> #4 [ffff880ec7c01c68] wait_for_completion at ffffffff81506cfd
>>> #5 [ffff880ec7c01c78] synchronize_sched at ffffffff810dda48
>>> #6 [ffff880ec7c01cc8] mlx4_cq_free at ffffffffa027e67e [mlx4_core]
>>> #7 [ffff880ec7c01cf8] mlx4_ib_destroy_cq at ffffffffa03238a3
>> [mlx4_ib]
>>> #8 [ffff880ec7c01d18] ib_destroy_cq at ffffffffa0301c9e [ib_core]
>>> #9 [ffff880ec7c01d28] rds_ib_conn_shutdown at ffffffffa041cf5d
>> [rds_rdma]
>>> #10 [ffff880ec7c01dd8] rds_conn_shutdown at ffffffffa02b7f18 [rds]
>>> #11 [ffff880ec7c01e38] rds_shutdown_worker at ffffffffa02bd14a [rds]
>>> #12 [ffff880ec7c01e58] process_one_work at ffffffff8108c3b9
>>> #13 [ffff880ec7c01ea8] worker_thread at ffffffff8108ccfa
>>> #14 [ffff880ec7c01ee8] kthread at ffffffff810912d7
>>> #15 [ffff880ec7c01f48] kernel_thread_helper at ffffffff815123c4
>>>
>>> Signed-off-by: Zheng Li <zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>> Signed-off-by: Joe Jin <joe.jin-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>> Signed-off-by: Guru <guru.anbalagane-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>> Signed-off-by: John Sobecki <john.sobecki-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>>> ---
>>> drivers/net/ethernet/mellanox/mlx4/cq.c | 26 +++++++++++++++----
>> -------
>>> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +-
>>> 2 files changed, 16 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c
>> b/drivers/net/ethernet/mellanox/mlx4/cq.c
>>> index 56022d6..8b3b849 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx4/cq.c
>>> +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
>>> @@ -54,15 +54,19 @@
>>>
>>> void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
>>> {
>>> + struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
>>> struct mlx4_cq *cq;
>>>
>>> - cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
>>> - cqn & (dev->caps.num_cqs - 1));
>>> + read_lock(&cq_table->lock);
>>> +
>>> + cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs -
>> 1));
>>> if (!cq) {
>>> mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
>>> return;
>>> }
>>>
>>> + read_unlock(&cq_table->lock);
>>> +
>>> ++cq->arm_sn;
>>>
>>> cq->comp(cq);
>>> @@ -73,13 +77,13 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn,
>> int event_type)
>>> struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
>>> struct mlx4_cq *cq;
>>>
>>> - spin_lock(&cq_table->lock);
>>> + read_lock(&cq_table->lock);
>>>
>>> cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs -
>> 1));
>>> if (cq)
>>> atomic_inc(&cq->refcount);
>>>
>>> - spin_unlock(&cq_table->lock);
>>> + read_unlock(&cq_table->lock);
>>>
>>> if (!cq) {
>>> mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
>>> @@ -256,9 +260,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
>>> if (err)
>>> return err;
>>>
>>> - spin_lock_irq(&cq_table->lock);
>>> + write_lock_irq(&cq_table->lock);
>>> err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
>>> - spin_unlock_irq(&cq_table->lock);
>>> + write_unlock_irq(&cq_table->lock);
>>> if (err)
>>> goto err_icm;
>>>
>>> @@ -297,9 +301,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
>>> return 0;
>>>
>>> err_radix:
>>> - spin_lock_irq(&cq_table->lock);
>>> + write_lock_irq(&cq_table->lock);
>>> radix_tree_delete(&cq_table->tree, cq->cqn);
>>> - spin_unlock_irq(&cq_table->lock);
>>> + write_unlock_irq(&cq_table->lock);
>>>
>>> err_icm:
>>> mlx4_cq_free_icm(dev, cq->cqn);
>>> @@ -320,9 +324,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct
>> mlx4_cq *cq)
>>>
>>> synchronize_irq(priv->eq_table.eq[cq->vector].irq);
>>>
>>> - spin_lock_irq(&cq_table->lock);
>>> + write_lock_irq(&cq_table->lock);
>>> radix_tree_delete(&cq_table->tree, cq->cqn);
>>> - spin_unlock_irq(&cq_table->lock);
>>> + write_unlock_irq(&cq_table->lock);
>>>
>>> if (atomic_dec_and_test(&cq->refcount))
>>> complete(&cq->free);
>>> @@ -337,7 +341,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
>>> struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
>>> int err;
>>>
>>> - spin_lock_init(&cq_table->lock);
>>> + rwlock_init(&cq_table->lock);
>>> INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
>>> if (mlx4_is_slave(dev))
>>> return 0;
>>> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>> b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>>> index de10dbb..42e2348 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>>> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
>>> @@ -642,7 +642,7 @@ struct mlx4_mr_table {
>>>
>>> struct mlx4_cq_table {
>>> struct mlx4_bitmap bitmap;
>>> - spinlock_t lock;
>>> + rwlock_t lock;
>>> struct radix_tree_root tree;
>>> struct mlx4_icm_table table;
>>> struct mlx4_icm_table cmpt_table;
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-11-05 8:47 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-04 9:23 [PATCH] net/mlx4_core: Convert rcu locking to rwlock in CQ Zheng Li
[not found] ` <1415093024-13041-1-git-send-email-zheng.x.li-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2014-11-04 10:51 ` Sagi Grimberg
[not found] ` <5458AFCA.8020309-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2014-11-04 11:32 ` Shachar Raindel
[not found] ` <0d09d3ae52134cf4bfe4e8554eb34185-LOZWmgKjnYgQouBfZGh8ttqRiQSDpxhJvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2014-11-05 8:47 ` zheng.li
2014-11-04 19:15 ` John Sobecki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox