From mboxrd@z Thu Jan 1 00:00:00 1970 Return-path: Received: from nm.newmedia-net.de ([217.113.179.122] helo=webmail.newmedia-net.de) by bombadil.infradead.org with esmtps (Exim 4.80.1 #2 (Red Hat Linux)) id 1a0vRg-0004jt-Kk for ath10k@lists.infradead.org; Mon, 23 Nov 2015 18:02:34 +0000 References: <1448284729-98078-1-git-send-email-nbd@openwrt.org> <1448284729-98078-2-git-send-email-nbd@openwrt.org> <56534C01.8030407@codeaurora.org> From: Sebastian Gottschall Message-ID: <5653549B.8090503@dd-wrt.com> Date: Mon, 23 Nov 2015 19:02:03 +0100 MIME-Version: 1.0 In-Reply-To: <56534C01.8030407@codeaurora.org> Subject: Re: [PATCH 2/2] ath10k: do not use coherent memory for tx buffers List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Sender: "ath10k" Errors-To: ath10k-bounces+kvalo=adurom.com@lists.infradead.org To: ath10k@lists.infradead.org Am 23.11.2015 um 18:25 schrieb Peter Oh: > Hi, > > Have you measured the peak throughput? > The pre-allocated coherent memory concept was introduced as once of > peak throughput improvement. > IIRC, dma_map_single takes about 4 us on Cortex A7 and > dma_unmap_single also takes time to invalid cache. > Please share your tput number before and after, so I don't need to > worry about performance degrade. yes. and this concept fucks up the qualcom ipq806x platform (which has by default 2 QCA99XX cards). it does not work, since the preallocated concept allocates too much memory which is not available in dma space on that platform. thanks Sebastian > > Thanks, > Peter > > On 11/23/2015 05:18 AM, Felix Fietkau wrote: >> Coherent memory is expensive to access, since all memory accesses bypass >> the cache. It is also completely unnecessary for this case. >> Convert to mapped memory instead and use the DMA API to flush the cache >> where necessary. >> Fixes allocation failures on embedded devices. >> >> Signed-off-by: Felix Fietkau >> --- >> drivers/net/wireless/ath/ath10k/htt_tx.c | 77 >> +++++++++++++++++++++----------- >> 1 file changed, 51 insertions(+), 26 deletions(-) >> >> diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c >> b/drivers/net/wireless/ath/ath10k/htt_tx.c >> index 8f76b9d..99d9793 100644 >> --- a/drivers/net/wireless/ath/ath10k/htt_tx.c >> +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c >> @@ -100,7 +100,7 @@ void ath10k_htt_tx_free_msdu_id(struct ath10k_htt >> *htt, u16 msdu_id) >> int ath10k_htt_tx_alloc(struct ath10k_htt *htt) >> { >> struct ath10k *ar = htt->ar; >> - int ret, size; >> + int size; >> ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt tx max num pending tx >> %d\n", >> htt->max_num_pending_tx); >> @@ -109,39 +109,41 @@ int ath10k_htt_tx_alloc(struct ath10k_htt *htt) >> idr_init(&htt->pending_tx); >> size = htt->max_num_pending_tx * sizeof(struct >> ath10k_htt_txbuf); >> - htt->txbuf.vaddr = dma_alloc_coherent(ar->dev, size, >> - &htt->txbuf.paddr, >> - GFP_DMA); >> - if (!htt->txbuf.vaddr) { >> - ath10k_err(ar, "failed to alloc tx buffer\n"); >> - ret = -ENOMEM; >> + htt->txbuf.vaddr = kzalloc(size, GFP_KERNEL); >> + if (!htt->txbuf.vaddr) >> goto free_idr_pending_tx; >> - } >> + >> + htt->txbuf.paddr = dma_map_single(ar->dev, htt->txbuf.vaddr, size, >> + DMA_TO_DEVICE); >> + if (dma_mapping_error(ar->dev, htt->txbuf.paddr)) >> + goto free_txbuf_vaddr; >> if (!ar->hw_params.continuous_frag_desc) >> - goto skip_frag_desc_alloc; >> + return 0; >> size = htt->max_num_pending_tx * sizeof(struct >> htt_msdu_ext_desc); >> - htt->frag_desc.vaddr = dma_alloc_coherent(ar->dev, size, >> - &htt->frag_desc.paddr, >> - GFP_DMA); >> - if (!htt->frag_desc.vaddr) { >> - ath10k_warn(ar, "failed to alloc fragment desc memory\n"); >> - ret = -ENOMEM; >> + htt->frag_desc.vaddr = kzalloc(size, GFP_KERNEL); >> + if (!htt->frag_desc.vaddr) >> goto free_txbuf; >> - } >> -skip_frag_desc_alloc: >> + htt->frag_desc.paddr = dma_map_single(ar->dev, >> htt->frag_desc.vaddr, >> + size, DMA_TO_DEVICE); >> + if (dma_mapping_error(ar->dev, htt->txbuf.paddr)) >> + goto free_frag_desc; >> + >> return 0; >> +free_frag_desc: >> + kfree(htt->frag_desc.vaddr); >> free_txbuf: >> size = htt->max_num_pending_tx * >> sizeof(struct ath10k_htt_txbuf); >> - dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr, >> - htt->txbuf.paddr); >> + dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size, >> DMA_TO_DEVICE); >> +free_txbuf_vaddr: >> + kfree(htt->txbuf.vaddr); >> free_idr_pending_tx: >> idr_destroy(&htt->pending_tx); >> - return ret; >> + return -ENOMEM; >> } >> static int ath10k_htt_tx_clean_up_pending(int msdu_id, void *skb, >> void *ctx) >> @@ -170,15 +172,17 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt) >> if (htt->txbuf.vaddr) { >> size = htt->max_num_pending_tx * >> sizeof(struct ath10k_htt_txbuf); >> - dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr, >> - htt->txbuf.paddr); >> + dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size, >> + DMA_TO_DEVICE); >> + kfree(htt->txbuf.vaddr); >> } >> if (htt->frag_desc.vaddr) { >> size = htt->max_num_pending_tx * >> sizeof(struct htt_msdu_ext_desc); >> - dma_free_coherent(htt->ar->dev, size, htt->frag_desc.vaddr, >> - htt->frag_desc.paddr); >> + dma_unmap_single(htt->ar->dev, htt->frag_desc.paddr, size, >> + DMA_TO_DEVICE); >> + kfree(htt->frag_desc.vaddr); >> } >> } >> @@ -550,6 +554,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, >> struct sk_buff *msdu) >> struct htt_msdu_ext_desc *ext_desc = NULL; >> bool limit_mgmt_desc = false; >> bool is_probe_resp = false; >> + int txbuf_offset, frag_offset, frag_size; >> if (unlikely(ieee80211_is_mgmt(hdr->frame_control)) && >> ar->hw_params.max_probe_resp_desc_thres) { >> @@ -574,9 +579,11 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct >> sk_buff *msdu) >> prefetch_len = min(htt->prefetch_len, msdu->len); >> prefetch_len = roundup(prefetch_len, 4); >> + frag_size = sizeof(struct htt_msdu_ext_desc); >> + frag_offset = frag_size * msdu_id; >> + txbuf_offset = sizeof(struct ath10k_htt_txbuf) * msdu_id; >> skb_cb->htt.txbuf = &htt->txbuf.vaddr[msdu_id]; >> - skb_cb->htt.txbuf_paddr = htt->txbuf.paddr + >> - (sizeof(struct ath10k_htt_txbuf) * msdu_id); >> + skb_cb->htt.txbuf_paddr = htt->txbuf.paddr + txbuf_offset; >> if ((ieee80211_is_action(hdr->frame_control) || >> ieee80211_is_deauth(hdr->frame_control) || >> @@ -597,6 +604,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct >> sk_buff *msdu) >> goto err_free_msdu_id; >> } >> + dma_sync_single_range_for_cpu(dev, htt->txbuf.paddr, >> txbuf_offset, >> + sizeof(struct ath10k_htt_txbuf), >> + DMA_TO_DEVICE); >> + >> + if (ar->hw_params.continuous_frag_desc) >> + dma_sync_single_range_for_cpu(dev, htt->frag_desc.paddr, >> + frag_offset, frag_size, >> + DMA_TO_DEVICE); >> + >> switch (skb_cb->txmode) { >> case ATH10K_HW_TXRX_RAW: >> case ATH10K_HW_TXRX_NATIVE_WIFI: >> @@ -723,6 +739,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct >> sk_buff *msdu) >> sg_items[1].paddr = skb_cb->paddr; >> sg_items[1].len = prefetch_len; >> + if (ar->hw_params.continuous_frag_desc) >> + dma_sync_single_range_for_device(dev, htt->frag_desc.paddr, >> + frag_offset, frag_size, >> + DMA_TO_DEVICE); >> + >> + dma_sync_single_range_for_device(dev, htt->txbuf.paddr, >> txbuf_offset, >> + sizeof(struct ath10k_htt_txbuf), >> + DMA_TO_DEVICE); >> + >> res = ath10k_hif_tx_sg(htt->ar, >> htt->ar->htc.endpoint[htt->eid].ul_pipe_id, >> sg_items, ARRAY_SIZE(sg_items)); > > > _______________________________________________ > ath10k mailing list > ath10k@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/ath10k > _______________________________________________ ath10k mailing list ath10k@lists.infradead.org http://lists.infradead.org/mailman/listinfo/ath10k