public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Junxian Huang <huangjunxian6@hisilicon.com>,
	jgg@ziepe.ca, leon@kernel.org
Cc: linux-rdma@vger.kernel.org, linuxarm@huawei.com,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH for-rc 3/9] RDMA/hns: Fix soft lockup under heavy CEQE load
Date: Fri, 5 Jul 2024 18:47:56 +0800	[thread overview]
Message-ID: <aa0acabe-567a-45d9-ad0a-69e85e6c300a@linux.dev> (raw)
In-Reply-To: <20240705085937.1644229-4-huangjunxian6@hisilicon.com>

在 2024/7/5 16:59, Junxian Huang 写道:
> CEQEs are handled in interrupt handler currently. This may cause the
> CPU core staying in interrupt context too long and lead to soft lockup
> under heavy load.
> 
> Handle CEQEs in tasklet and set an upper limit for the number of CEQE
> handled by a single call of tasklet.

https://patchwork.kernel.org/project/linux-rdma/cover/20240621050525.3720069-1-allen.lkml@gmail.com/

In the above link, it seems that tasklet is not good enough. The tasklet 
is marked deprecated and has some design flaws. It is being replace BH 
workqueue.

So directly use workqueue instead of tasklet?

Zhu Yanjun

> 
> Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08")
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
>   drivers/infiniband/hw/hns/hns_roce_device.h |  1 +
>   drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 88 ++++++++++++---------
>   2 files changed, 53 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 05005079258c..5a2445f357ab 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -717,6 +717,7 @@ struct hns_roce_eq {
>   	int				shift;
>   	int				event_type;
>   	int				sub_type;
> +	struct tasklet_struct		tasklet;
>   };
>   
>   struct hns_roce_eq_table {
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index ff135df1a761..f73de06a3ca5 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -6146,33 +6146,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
>   		!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
>   }
>   
> -static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
> -				       struct hns_roce_eq *eq)
> +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
>   {
> -	struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
> -	irqreturn_t ceqe_found = IRQ_NONE;
> -	u32 cqn;
> -
> -	while (ceqe) {
> -		/* Make sure we read CEQ entry after we have checked the
> -		 * ownership bit
> -		 */
> -		dma_rmb();
> -
> -		cqn = hr_reg_read(ceqe, CEQE_CQN);
> -
> -		hns_roce_cq_completion(hr_dev, cqn);
> -
> -		++eq->cons_index;
> -		ceqe_found = IRQ_HANDLED;
> -		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
> -
> -		ceqe = next_ceqe_sw_v2(eq);
> -	}
> +	tasklet_schedule(&eq->tasklet);
>   
> -	update_eq_db(eq);
> -
> -	return IRQ_RETVAL(ceqe_found);
> +	return IRQ_HANDLED;
>   }
>   
>   static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
> @@ -6183,7 +6161,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
>   
>   	if (eq->type_flag == HNS_ROCE_CEQ)
>   		/* Completion event interrupt */
> -		int_work = hns_roce_v2_ceq_int(hr_dev, eq);
> +		int_work = hns_roce_v2_ceq_int(eq);
>   	else
>   		/* Asynchronous event interrupt */
>   		int_work = hns_roce_v2_aeq_int(hr_dev, eq);
> @@ -6551,6 +6529,34 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
>   	return ret;
>   }
>   
> +static void hns_roce_ceq_task(struct tasklet_struct *task)
> +{
> +	struct hns_roce_eq *eq = from_tasklet(eq, task, tasklet);
> +	struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
> +	struct hns_roce_dev *hr_dev = eq->hr_dev;
> +	int ceqe_num = 0;
> +	u32 cqn;
> +
> +	while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
> +		/* Make sure we read CEQ entry after we have checked the
> +		 * ownership bit
> +		 */
> +		dma_rmb();
> +
> +		cqn = hr_reg_read(ceqe, CEQE_CQN);
> +
> +		hns_roce_cq_completion(hr_dev, cqn);
> +
> +		++eq->cons_index;
> +		++ceqe_num;
> +		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
> +
> +		ceqe = next_ceqe_sw_v2(eq);
> +	}
> +
> +	update_eq_db(eq);
> +}
> +
>   static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
>   				  int comp_num, int aeq_num, int other_num)
>   {
> @@ -6582,21 +6588,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
>   			 j - other_num - aeq_num);
>   
>   	for (j = 0; j < irq_num; j++) {
> -		if (j < other_num)
> +		if (j < other_num) {
>   			ret = request_irq(hr_dev->irq[j],
>   					  hns_roce_v2_msix_interrupt_abn,
>   					  0, hr_dev->irq_names[j], hr_dev);
> -
> -		else if (j < (other_num + comp_num))
> +		} else if (j < (other_num + comp_num)) {
> +			tasklet_setup(&eq_table->eq[j - other_num].tasklet,
> +				      hns_roce_ceq_task);
>   			ret = request_irq(eq_table->eq[j - other_num].irq,
>   					  hns_roce_v2_msix_interrupt_eq,
>   					  0, hr_dev->irq_names[j + aeq_num],
>   					  &eq_table->eq[j - other_num]);
> -		else
> +		} else {
>   			ret = request_irq(eq_table->eq[j - other_num].irq,
>   					  hns_roce_v2_msix_interrupt_eq,
>   					  0, hr_dev->irq_names[j - comp_num],
>   					  &eq_table->eq[j - other_num]);
> +		}
> +
>   		if (ret) {
>   			dev_err(hr_dev->dev, "request irq error!\n");
>   			goto err_request_failed;
> @@ -6606,12 +6615,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
>   	return 0;
>   
>   err_request_failed:
> -	for (j -= 1; j >= 0; j--)
> -		if (j < other_num)
> +	for (j -= 1; j >= 0; j--) {
> +		if (j < other_num) {
>   			free_irq(hr_dev->irq[j], hr_dev);
> -		else
> -			free_irq(eq_table->eq[j - other_num].irq,
> -				 &eq_table->eq[j - other_num]);
> +			continue;
> +		}
> +		free_irq(eq_table->eq[j - other_num].irq,
> +			 &eq_table->eq[j - other_num]);
> +		if (j < other_num + comp_num)
> +			tasklet_kill(&eq_table->eq[j - other_num].tasklet);
> +	}
>   
>   err_kzalloc_failed:
>   	for (i -= 1; i >= 0; i--)
> @@ -6632,8 +6645,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
>   	for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
>   		free_irq(hr_dev->irq[i], hr_dev);
>   
> -	for (i = 0; i < eq_num; i++)
> +	for (i = 0; i < eq_num; i++) {
>   		free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
> +		if (i < hr_dev->caps.num_comp_vectors)
> +			tasklet_kill(&hr_dev->eq_table.eq[i].tasklet);
> +	}
>   
>   	for (i = 0; i < irq_num; i++)
>   		kfree(hr_dev->irq_names[i]);


  reply	other threads:[~2024-07-05 10:48 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-05  8:59 [PATCH for-rc 0/9] RDMA/hns: Bugfixes Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 1/9] RDMA/hns: Check atomic wr length Junxian Huang
2024-07-07  8:24   ` Leon Romanovsky
2024-07-08  2:27     ` Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 2/9] RDMA/hns: Fix a long wait for cmdq event during reset Junxian Huang
2024-07-07  8:30   ` Leon Romanovsky
2024-07-08  2:29     ` Junxian Huang
2024-07-08  5:38       ` Leon Romanovsky
2024-07-08  6:50         ` Junxian Huang
2024-07-08  7:33           ` Leon Romanovsky
2024-07-08  7:46             ` Junxian Huang
2024-07-08  8:27               ` Leon Romanovsky
2024-07-08  8:45                 ` Junxian Huang
2024-07-08  8:59                   ` Leon Romanovsky
2024-07-08  9:30                     ` Junxian Huang
2024-07-08 11:16                       ` Leon Romanovsky
2024-07-09  6:21                         ` Junxian Huang
2024-07-09  7:22                           ` Leon Romanovsky
2024-07-09  7:49                             ` Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 3/9] RDMA/hns: Fix soft lockup under heavy CEQE load Junxian Huang
2024-07-05 10:47   ` Zhu Yanjun [this message]
2024-07-08  2:30     ` Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 4/9] RDMA/hns: Fix unmatch exception handling when init eq table fails Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 5/9] RDMA/hns: Fix missing pagesize and alignment check in FRMR Junxian Huang
2024-07-07  9:16   ` Zhu Yanjun
2024-07-08  2:44     ` Junxian Huang
2024-07-08  5:41       ` Leon Romanovsky
2024-07-08  7:57       ` Zhu Yanjun
2024-07-08  8:33         ` Leon Romanovsky
2024-07-05  8:59 ` [PATCH for-rc 6/9] RDMA/hns: Fix shift-out-bounds when max_inline_data is 0 Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 7/9] RDMA/hns: Fix undifined behavior caused by invalid max_sge Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 8/9] RDMA/hns: Fix insufficient extend DB for VFs Junxian Huang
2024-07-05  8:59 ` [PATCH for-rc 9/9] RDMA/hns: Fix mbx timing out before CMD execution is completed Junxian Huang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aa0acabe-567a-45d9-ad0a-69e85e6c300a@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=huangjunxian6@hisilicon.com \
    --cc=jgg@ziepe.ca \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox