From: Sebastian Gottschall <s.gottschall@dd-wrt.com>
To: ath10k@lists.infradead.org
Subject: Re: [PATCH 2/2] ath10k: do not use coherent memory for tx buffers
Date: Mon, 23 Nov 2015 19:02:03 +0100 [thread overview]
Message-ID: <5653549B.8090503@dd-wrt.com> (raw)
In-Reply-To: <56534C01.8030407@codeaurora.org>
Am 23.11.2015 um 18:25 schrieb Peter Oh:
> Hi,
>
> Have you measured the peak throughput?
> The pre-allocated coherent memory concept was introduced as once of
> peak throughput improvement.
> IIRC, dma_map_single takes about 4 us on Cortex A7 and
> dma_unmap_single also takes time to invalid cache.
> Please share your tput number before and after, so I don't need to
> worry about performance degrade.
yes. and this concept fucks up the qualcom ipq806x platform (which has
by default 2 QCA99XX cards). it does not work, since the preallocated
concept allocates too much memory which is not available
in dma space on that platform.
thanks
Sebastian
>
> Thanks,
> Peter
>
> On 11/23/2015 05:18 AM, Felix Fietkau wrote:
>> Coherent memory is expensive to access, since all memory accesses bypass
>> the cache. It is also completely unnecessary for this case.
>> Convert to mapped memory instead and use the DMA API to flush the cache
>> where necessary.
>> Fixes allocation failures on embedded devices.
>>
>> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
>> ---
>> drivers/net/wireless/ath/ath10k/htt_tx.c | 77
>> +++++++++++++++++++++-----------
>> 1 file changed, 51 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c
>> b/drivers/net/wireless/ath/ath10k/htt_tx.c
>> index 8f76b9d..99d9793 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt_tx.c
>> +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
>> @@ -100,7 +100,7 @@ void ath10k_htt_tx_free_msdu_id(struct ath10k_htt
>> *htt, u16 msdu_id)
>> int ath10k_htt_tx_alloc(struct ath10k_htt *htt)
>> {
>> struct ath10k *ar = htt->ar;
>> - int ret, size;
>> + int size;
>> ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt tx max num pending tx
>> %d\n",
>> htt->max_num_pending_tx);
>> @@ -109,39 +109,41 @@ int ath10k_htt_tx_alloc(struct ath10k_htt *htt)
>> idr_init(&htt->pending_tx);
>> size = htt->max_num_pending_tx * sizeof(struct
>> ath10k_htt_txbuf);
>> - htt->txbuf.vaddr = dma_alloc_coherent(ar->dev, size,
>> - &htt->txbuf.paddr,
>> - GFP_DMA);
>> - if (!htt->txbuf.vaddr) {
>> - ath10k_err(ar, "failed to alloc tx buffer\n");
>> - ret = -ENOMEM;
>> + htt->txbuf.vaddr = kzalloc(size, GFP_KERNEL);
>> + if (!htt->txbuf.vaddr)
>> goto free_idr_pending_tx;
>> - }
>> +
>> + htt->txbuf.paddr = dma_map_single(ar->dev, htt->txbuf.vaddr, size,
>> + DMA_TO_DEVICE);
>> + if (dma_mapping_error(ar->dev, htt->txbuf.paddr))
>> + goto free_txbuf_vaddr;
>> if (!ar->hw_params.continuous_frag_desc)
>> - goto skip_frag_desc_alloc;
>> + return 0;
>> size = htt->max_num_pending_tx * sizeof(struct
>> htt_msdu_ext_desc);
>> - htt->frag_desc.vaddr = dma_alloc_coherent(ar->dev, size,
>> - &htt->frag_desc.paddr,
>> - GFP_DMA);
>> - if (!htt->frag_desc.vaddr) {
>> - ath10k_warn(ar, "failed to alloc fragment desc memory\n");
>> - ret = -ENOMEM;
>> + htt->frag_desc.vaddr = kzalloc(size, GFP_KERNEL);
>> + if (!htt->frag_desc.vaddr)
>> goto free_txbuf;
>> - }
>> -skip_frag_desc_alloc:
>> + htt->frag_desc.paddr = dma_map_single(ar->dev,
>> htt->frag_desc.vaddr,
>> + size, DMA_TO_DEVICE);
>> + if (dma_mapping_error(ar->dev, htt->txbuf.paddr))
>> + goto free_frag_desc;
>> +
>> return 0;
>> +free_frag_desc:
>> + kfree(htt->frag_desc.vaddr);
>> free_txbuf:
>> size = htt->max_num_pending_tx *
>> sizeof(struct ath10k_htt_txbuf);
>> - dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr,
>> - htt->txbuf.paddr);
>> + dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size,
>> DMA_TO_DEVICE);
>> +free_txbuf_vaddr:
>> + kfree(htt->txbuf.vaddr);
>> free_idr_pending_tx:
>> idr_destroy(&htt->pending_tx);
>> - return ret;
>> + return -ENOMEM;
>> }
>> static int ath10k_htt_tx_clean_up_pending(int msdu_id, void *skb,
>> void *ctx)
>> @@ -170,15 +172,17 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
>> if (htt->txbuf.vaddr) {
>> size = htt->max_num_pending_tx *
>> sizeof(struct ath10k_htt_txbuf);
>> - dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr,
>> - htt->txbuf.paddr);
>> + dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size,
>> + DMA_TO_DEVICE);
>> + kfree(htt->txbuf.vaddr);
>> }
>> if (htt->frag_desc.vaddr) {
>> size = htt->max_num_pending_tx *
>> sizeof(struct htt_msdu_ext_desc);
>> - dma_free_coherent(htt->ar->dev, size, htt->frag_desc.vaddr,
>> - htt->frag_desc.paddr);
>> + dma_unmap_single(htt->ar->dev, htt->frag_desc.paddr, size,
>> + DMA_TO_DEVICE);
>> + kfree(htt->frag_desc.vaddr);
>> }
>> }
>> @@ -550,6 +554,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt,
>> struct sk_buff *msdu)
>> struct htt_msdu_ext_desc *ext_desc = NULL;
>> bool limit_mgmt_desc = false;
>> bool is_probe_resp = false;
>> + int txbuf_offset, frag_offset, frag_size;
>> if (unlikely(ieee80211_is_mgmt(hdr->frame_control)) &&
>> ar->hw_params.max_probe_resp_desc_thres) {
>> @@ -574,9 +579,11 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct
>> sk_buff *msdu)
>> prefetch_len = min(htt->prefetch_len, msdu->len);
>> prefetch_len = roundup(prefetch_len, 4);
>> + frag_size = sizeof(struct htt_msdu_ext_desc);
>> + frag_offset = frag_size * msdu_id;
>> + txbuf_offset = sizeof(struct ath10k_htt_txbuf) * msdu_id;
>> skb_cb->htt.txbuf = &htt->txbuf.vaddr[msdu_id];
>> - skb_cb->htt.txbuf_paddr = htt->txbuf.paddr +
>> - (sizeof(struct ath10k_htt_txbuf) * msdu_id);
>> + skb_cb->htt.txbuf_paddr = htt->txbuf.paddr + txbuf_offset;
>> if ((ieee80211_is_action(hdr->frame_control) ||
>> ieee80211_is_deauth(hdr->frame_control) ||
>> @@ -597,6 +604,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct
>> sk_buff *msdu)
>> goto err_free_msdu_id;
>> }
>> + dma_sync_single_range_for_cpu(dev, htt->txbuf.paddr,
>> txbuf_offset,
>> + sizeof(struct ath10k_htt_txbuf),
>> + DMA_TO_DEVICE);
>> +
>> + if (ar->hw_params.continuous_frag_desc)
>> + dma_sync_single_range_for_cpu(dev, htt->frag_desc.paddr,
>> + frag_offset, frag_size,
>> + DMA_TO_DEVICE);
>> +
>> switch (skb_cb->txmode) {
>> case ATH10K_HW_TXRX_RAW:
>> case ATH10K_HW_TXRX_NATIVE_WIFI:
>> @@ -723,6 +739,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct
>> sk_buff *msdu)
>> sg_items[1].paddr = skb_cb->paddr;
>> sg_items[1].len = prefetch_len;
>> + if (ar->hw_params.continuous_frag_desc)
>> + dma_sync_single_range_for_device(dev, htt->frag_desc.paddr,
>> + frag_offset, frag_size,
>> + DMA_TO_DEVICE);
>> +
>> + dma_sync_single_range_for_device(dev, htt->txbuf.paddr,
>> txbuf_offset,
>> + sizeof(struct ath10k_htt_txbuf),
>> + DMA_TO_DEVICE);
>> +
>> res = ath10k_hif_tx_sg(htt->ar,
>> htt->ar->htc.endpoint[htt->eid].ul_pipe_id,
>> sg_items, ARRAY_SIZE(sg_items));
>
>
> _______________________________________________
> ath10k mailing list
> ath10k@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/ath10k
>
_______________________________________________
ath10k mailing list
ath10k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath10k
next prev parent reply other threads:[~2015-11-23 18:02 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-23 13:18 [PATCH 1/2] ath10k: do not use coherent memory for allocated device memory chunks Felix Fietkau
2015-11-23 13:18 ` [PATCH 2/2] ath10k: do not use coherent memory for tx buffers Felix Fietkau
2015-11-23 17:25 ` Peter Oh
2015-11-23 18:02 ` Sebastian Gottschall [this message]
2015-11-23 18:25 ` Peter Oh
2015-11-23 18:34 ` Felix Fietkau
2015-11-23 18:18 ` Felix Fietkau
2015-11-23 18:50 ` Peter Oh
2015-11-24 10:32 ` Felix Fietkau
2015-11-25 10:37 ` Kalle Valo
2015-11-30 10:29 ` [PATCH 1/2] ath10k: do not use coherent memory for allocated device memory chunks Kalle Valo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5653549B.8090503@dd-wrt.com \
--to=s.gottschall@dd-wrt.com \
--cc=ath10k@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox