From: Hemant Kumar <hemantk@codeaurora.org>
To: Loic Poulain <loic.poulain@linaro.org>, kuba@kernel.org
Cc: manivannan.sadhasivam@linaro.org, linux-arm-msm@vger.kernel.org,
netdev@vger.kernel.org, davem@davemloft.net
Subject: Re: [PATCH 3/3] net: mhi: Add dedicated alloc thread
Date: Wed, 9 Dec 2020 11:00:53 -0800 [thread overview]
Message-ID: <9b4aa706-ce32-0dff-f6d6-28e2e394783e@codeaurora.org> (raw)
In-Reply-To: <1607526183-25652-3-git-send-email-loic.poulain@linaro.org>
On 12/9/20 7:03 AM, Loic Poulain wrote:
> The buffer allocation for RX path is currently done by a work executed
> in the system workqueue. The work to do is quite simple and consists
> mostly in allocating and queueing as much as possible buffers to the MHI
queuing
> RX channel.
>
> It appears that using a dedicated kthread would be more appropriate to
> prevent
> 1. RX allocation latency introduced by the system queue
> 2. Unbounded work execution, the work only returning when queue is
> full, it can possibly monopolise the workqueue thread on slower systems.
>
> This patch replaces the system work with a simple kthread that loops on
> buffer allocation and sleeps when queue is full. Moreover it gets rid
> of the local rx_queued variable (to track buffer count), and instead,
> relies on the new mhi_get_free_desc_count helper.
>
> After pratical testing on a x86_64 machine, this change improves
practical ?
> - Peek throughput (slightly, by few mbps)
> - Throughput stability when concurrent loads are running (stress)
> - CPU usage, less CPU cycles dedicated to the task
>
> Below is the powertop output for RX allocation task before and after
> this change, when performing UDP download at 6Gbps. Mostly to highlight
> the improvement in term of CPU usage.
>
> older (system workqueue):
> Usage Events/s Category Description
> 63,2 ms/s 134,0 kWork mhi_net_rx_refill_work
> 62,8 ms/s 134,3 kWork mhi_net_rx_refill_work
> 60,8 ms/s 141,4 kWork mhi_net_rx_refill_work
>
> newer (dedicated kthread):
> Usage Events/s Category Description
> 20,7 ms/s 155,6 Process [PID 3360] [mhi-net-rx]
> 22,2 ms/s 169,6 Process [PID 3360] [mhi-net-rx]
> 22,3 ms/s 150,2 Process [PID 3360] [mhi-net-rx]
>
> Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
> ---
> drivers/net/mhi_net.c | 98 ++++++++++++++++++++++++++-------------------------
> 1 file changed, 50 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
> index 0333e07..eef40f5 100644
> --- a/drivers/net/mhi_net.c
> +++ b/drivers/net/mhi_net.c
> @@ -5,6 +5,7 @@
> */
>
> #include <linux/if_arp.h>
> +#include <linux/kthread.h>
> #include <linux/mhi.h>
> #include <linux/mod_devicetable.h>
> #include <linux/module.h>
> @@ -25,7 +26,6 @@ struct mhi_net_stats {
> u64_stats_t tx_bytes;
> u64_stats_t tx_errors;
> u64_stats_t tx_dropped;
> - atomic_t rx_queued;
> struct u64_stats_sync tx_syncp;
> struct u64_stats_sync rx_syncp;
> };
> @@ -33,17 +33,59 @@ struct mhi_net_stats {
> struct mhi_net_dev {
> struct mhi_device *mdev;
> struct net_device *ndev;
> - struct delayed_work rx_refill;
> + struct task_struct *refill_task;
> + wait_queue_head_t refill_wq;
> struct mhi_net_stats stats;
> u32 rx_queue_sz;
> };
>
> +static int mhi_net_refill_thread(void *data)
> +{
> + struct mhi_net_dev *mhi_netdev = data;
> + struct net_device *ndev = mhi_netdev->ndev;
> + struct mhi_device *mdev = mhi_netdev->mdev;
> + int size = READ_ONCE(ndev->mtu);
> + struct sk_buff *skb;
> + int err;
> +
> + while (1) {
> + err = wait_event_interruptible(mhi_netdev->refill_wq,
> + !mhi_queue_is_full(mdev, DMA_FROM_DEVICE)
> + || kthread_should_stop());
> + if (err || kthread_should_stop())
> + break;
> +
> + skb = netdev_alloc_skb(ndev, size);
> + if (unlikely(!skb)) {
> + /* No memory, retry later */
> + schedule_timeout_interruptible(msecs_to_jiffies(250));
> + continue;
> + }
> +
> + err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
> + if (unlikely(err)) {
> + net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
> + ndev->name, err);
> + kfree_skb(skb);
> + break;
> + }
> +
> + /* Do not hog the CPU */
> + cond_resched();
> + }
> +
> + return 0;
> +}
> +
> static int mhi_ndo_open(struct net_device *ndev)
> {
> struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
>
> - /* Feed the rx buffer pool */
> - schedule_delayed_work(&mhi_netdev->rx_refill, 0);
> + mhi_netdev->refill_task = kthread_run(mhi_net_refill_thread, mhi_netdev,
> + "mhi-net-rx");
> + if (IS_ERR(mhi_netdev->refill_task)) {
nit: you can remove curly brace for single statement
> + return PTR_ERR(mhi_netdev->refill_task);
> + }
>
> /* Carrier is established via out-of-band channel (e.g. qmi) */
> netif_carrier_on(ndev);
> @@ -57,9 +99,9 @@ static int mhi_ndo_stop(struct net_device *ndev)
> {
> struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
>
> + kthread_stop(mhi_netdev->refill_task);
> netif_stop_queue(ndev);
> netif_carrier_off(ndev);
> - cancel_delayed_work_sync(&mhi_netdev->rx_refill);
>
> return 0;
> }
> @@ -138,9 +180,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
> {
> struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
> struct sk_buff *skb = mhi_res->buf_addr;
> - int remaining;
> -
> - remaining = atomic_dec_return(&mhi_netdev->stats.rx_queued);
>
> if (unlikely(mhi_res->transaction_status)) {
> dev_kfree_skb_any(skb);
> @@ -163,9 +202,8 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
> netif_rx(skb);
> }
>
> - /* Refill if RX buffers queue becomes low */
> - if (remaining <= mhi_netdev->rx_queue_sz / 2)
> - schedule_delayed_work(&mhi_netdev->rx_refill, 0);
> + if (mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE) >= mhi_netdev->rx_queue_sz / 3)
would it be helpful to add a module param instead of rx_queue_sz/3,
which can help to fine tune when you wake up kthread at run time.
Comparing to the value used last used now you are dividing by 3.
> + wake_up_interruptible(&mhi_netdev->refill_wq);
> }
>
> static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
> @@ -200,42 +238,6 @@ static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
> netif_wake_queue(ndev);
> }
>
> -static void mhi_net_rx_refill_work(struct work_struct *work)
> -{
> - struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
> - rx_refill.work);
> - struct net_device *ndev = mhi_netdev->ndev;
> - struct mhi_device *mdev = mhi_netdev->mdev;
> - int size = READ_ONCE(ndev->mtu);
> - struct sk_buff *skb;
> - int err;
> -
> - while (atomic_read(&mhi_netdev->stats.rx_queued) < mhi_netdev->rx_queue_sz) {
> - skb = netdev_alloc_skb(ndev, size);
> - if (unlikely(!skb))
> - break;
> -
> - err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
> - if (unlikely(err)) {
> - net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
> - ndev->name, err);
> - kfree_skb(skb);
> - break;
> - }
> -
> - atomic_inc(&mhi_netdev->stats.rx_queued);
> -
> - /* Do not hog the CPU if rx buffers are consumed faster than
> - * queued (unlikely).
> - */
> - cond_resched();
> - }
> -
> - /* If we're still starved of rx buffers, reschedule later */
> - if (unlikely(!atomic_read(&mhi_netdev->stats.rx_queued)))
> - schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
> -}
> -
> static int mhi_net_probe(struct mhi_device *mhi_dev,
> const struct mhi_device_id *id)
> {
> @@ -256,7 +258,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
> mhi_netdev->mdev = mhi_dev;
> SET_NETDEV_DEV(ndev, &mhi_dev->dev);
>
> - INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
> + init_waitqueue_head(&mhi_netdev->refill_wq);
> u64_stats_init(&mhi_netdev->stats.rx_syncp);
> u64_stats_init(&mhi_netdev->stats.tx_syncp);
>
>
Thanks,
Hemant
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
prev parent reply other threads:[~2020-12-09 19:01 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-09 15:03 [PATCH 1/3] bus: mhi: core: Add helper API to return number of free TREs Loic Poulain
2020-12-09 15:03 ` [PATCH 2/3] net: mhi: Get RX queue size from MHI core Loic Poulain
2020-12-09 15:46 ` Jeffrey Hugo
2020-12-09 15:03 ` [PATCH 3/3] net: mhi: Add dedicated alloc thread Loic Poulain
2020-12-09 19:00 ` Hemant Kumar [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9b4aa706-ce32-0dff-f6d6-28e2e394783e@codeaurora.org \
--to=hemantk@codeaurora.org \
--cc=davem@davemloft.net \
--cc=kuba@kernel.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=loic.poulain@linaro.org \
--cc=manivannan.sadhasivam@linaro.org \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).