From mboxrd@z Thu Jan 1 00:00:00 1970 Return-path: Received: from smtp.codeaurora.org ([198.145.29.96]) by bombadil.infradead.org with esmtps (Exim 4.80.1 #2 (Red Hat Linux)) id 1a0vpu-0004vm-TI for ath10k@lists.infradead.org; Mon, 23 Nov 2015 18:27:35 +0000 Received: from [10.234.232.17] (qf-scl1nat.qualcomm.com [207.114.132.30]) (using TLSv1.2 with cipher DHE-RSA-AES128-SHA (128/128 bits)) (No client certificate requested) (Authenticated sender: poh@smtp.codeaurora.org) by smtp.codeaurora.org (Postfix) with ESMTPSA id A87741412E0 for ; Mon, 23 Nov 2015 18:27:12 +0000 (UTC) Message-ID: <56535A23.3020804@codeaurora.org> Date: Mon, 23 Nov 2015 10:25:39 -0800 From: Peter Oh MIME-Version: 1.0 Subject: Re: [PATCH 2/2] ath10k: do not use coherent memory for tx buffers References: <1448284729-98078-1-git-send-email-nbd@openwrt.org> <1448284729-98078-2-git-send-email-nbd@openwrt.org> <56534C01.8030407@codeaurora.org> <5653549B.8090503@dd-wrt.com> In-Reply-To: <5653549B.8090503@dd-wrt.com> List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Sender: "ath10k" Errors-To: ath10k-bounces+kvalo=adurom.com@lists.infradead.org To: ath10k@lists.infradead.org On 11/23/2015 10:02 AM, Sebastian Gottschall wrote: > Am 23.11.2015 um 18:25 schrieb Peter Oh: >> Hi, >> >> Have you measured the peak throughput? >> The pre-allocated coherent memory concept was introduced as once of >> peak throughput improvement. >> IIRC, dma_map_single takes about 4 us on Cortex A7 and >> dma_unmap_single also takes time to invalid cache. >> Please share your tput number before and after, so I don't need to >> worry about performance degrade. > yes. and this concept fucks up the qualcom ipq806x platform (which has > by default 2 QCA99XX cards). it does not work, since the preallocated > concept allocates too much memory Can you specify the number that you're seeing? The tx buffer descriptor uses 56KB, so it would require 64KB x 2 slab for 2 cards which is 128KB. Are you saying your system does not work because of this 128KB usage? > which is not available > in dma space on that platform. > > thanks > Sebastian >> >> Thanks, >> Peter >> >> On 11/23/2015 05:18 AM, Felix Fietkau wrote: >>> Coherent memory is expensive to access, since all memory accesses >>> bypass >>> the cache. It is also completely unnecessary for this case. >>> Convert to mapped memory instead and use the DMA API to flush the cache >>> where necessary. >>> Fixes allocation failures on embedded devices. >>> >>> Signed-off-by: Felix Fietkau >>> --- >>> drivers/net/wireless/ath/ath10k/htt_tx.c | 77 >>> +++++++++++++++++++++----------- >>> 1 file changed, 51 insertions(+), 26 deletions(-) >>> >>> diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c >>> b/drivers/net/wireless/ath/ath10k/htt_tx.c >>> index 8f76b9d..99d9793 100644 >>> --- a/drivers/net/wireless/ath/ath10k/htt_tx.c >>> +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c >>> @@ -100,7 +100,7 @@ void ath10k_htt_tx_free_msdu_id(struct >>> ath10k_htt *htt, u16 msdu_id) >>> int ath10k_htt_tx_alloc(struct ath10k_htt *htt) >>> { >>> struct ath10k *ar = htt->ar; >>> - int ret, size; >>> + int size; >>> ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt tx max num pending tx >>> %d\n", >>> htt->max_num_pending_tx); >>> @@ -109,39 +109,41 @@ int ath10k_htt_tx_alloc(struct ath10k_htt *htt) >>> idr_init(&htt->pending_tx); >>> size = htt->max_num_pending_tx * sizeof(struct >>> ath10k_htt_txbuf); >>> - htt->txbuf.vaddr = dma_alloc_coherent(ar->dev, size, >>> - &htt->txbuf.paddr, >>> - GFP_DMA); >>> - if (!htt->txbuf.vaddr) { >>> - ath10k_err(ar, "failed to alloc tx buffer\n"); >>> - ret = -ENOMEM; >>> + htt->txbuf.vaddr = kzalloc(size, GFP_KERNEL); >>> + if (!htt->txbuf.vaddr) >>> goto free_idr_pending_tx; >>> - } >>> + >>> + htt->txbuf.paddr = dma_map_single(ar->dev, htt->txbuf.vaddr, size, >>> + DMA_TO_DEVICE); >>> + if (dma_mapping_error(ar->dev, htt->txbuf.paddr)) >>> + goto free_txbuf_vaddr; >>> if (!ar->hw_params.continuous_frag_desc) >>> - goto skip_frag_desc_alloc; >>> + return 0; >>> size = htt->max_num_pending_tx * sizeof(struct >>> htt_msdu_ext_desc); >>> - htt->frag_desc.vaddr = dma_alloc_coherent(ar->dev, size, >>> - &htt->frag_desc.paddr, >>> - GFP_DMA); >>> - if (!htt->frag_desc.vaddr) { >>> - ath10k_warn(ar, "failed to alloc fragment desc memory\n"); >>> - ret = -ENOMEM; >>> + htt->frag_desc.vaddr = kzalloc(size, GFP_KERNEL); >>> + if (!htt->frag_desc.vaddr) >>> goto free_txbuf; >>> - } >>> -skip_frag_desc_alloc: >>> + htt->frag_desc.paddr = dma_map_single(ar->dev, >>> htt->frag_desc.vaddr, >>> + size, DMA_TO_DEVICE); >>> + if (dma_mapping_error(ar->dev, htt->txbuf.paddr)) >>> + goto free_frag_desc; >>> + >>> return 0; >>> +free_frag_desc: >>> + kfree(htt->frag_desc.vaddr); >>> free_txbuf: >>> size = htt->max_num_pending_tx * >>> sizeof(struct ath10k_htt_txbuf); >>> - dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr, >>> - htt->txbuf.paddr); >>> + dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size, >>> DMA_TO_DEVICE); >>> +free_txbuf_vaddr: >>> + kfree(htt->txbuf.vaddr); >>> free_idr_pending_tx: >>> idr_destroy(&htt->pending_tx); >>> - return ret; >>> + return -ENOMEM; >>> } >>> static int ath10k_htt_tx_clean_up_pending(int msdu_id, void >>> *skb, void *ctx) >>> @@ -170,15 +172,17 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt) >>> if (htt->txbuf.vaddr) { >>> size = htt->max_num_pending_tx * >>> sizeof(struct ath10k_htt_txbuf); >>> - dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr, >>> - htt->txbuf.paddr); >>> + dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size, >>> + DMA_TO_DEVICE); >>> + kfree(htt->txbuf.vaddr); >>> } >>> if (htt->frag_desc.vaddr) { >>> size = htt->max_num_pending_tx * >>> sizeof(struct htt_msdu_ext_desc); >>> - dma_free_coherent(htt->ar->dev, size, htt->frag_desc.vaddr, >>> - htt->frag_desc.paddr); >>> + dma_unmap_single(htt->ar->dev, htt->frag_desc.paddr, size, >>> + DMA_TO_DEVICE); >>> + kfree(htt->frag_desc.vaddr); >>> } >>> } >>> @@ -550,6 +554,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, >>> struct sk_buff *msdu) >>> struct htt_msdu_ext_desc *ext_desc = NULL; >>> bool limit_mgmt_desc = false; >>> bool is_probe_resp = false; >>> + int txbuf_offset, frag_offset, frag_size; >>> if (unlikely(ieee80211_is_mgmt(hdr->frame_control)) && >>> ar->hw_params.max_probe_resp_desc_thres) { >>> @@ -574,9 +579,11 @@ int ath10k_htt_tx(struct ath10k_htt *htt, >>> struct sk_buff *msdu) >>> prefetch_len = min(htt->prefetch_len, msdu->len); >>> prefetch_len = roundup(prefetch_len, 4); >>> + frag_size = sizeof(struct htt_msdu_ext_desc); >>> + frag_offset = frag_size * msdu_id; >>> + txbuf_offset = sizeof(struct ath10k_htt_txbuf) * msdu_id; >>> skb_cb->htt.txbuf = &htt->txbuf.vaddr[msdu_id]; >>> - skb_cb->htt.txbuf_paddr = htt->txbuf.paddr + >>> - (sizeof(struct ath10k_htt_txbuf) * msdu_id); >>> + skb_cb->htt.txbuf_paddr = htt->txbuf.paddr + txbuf_offset; >>> if ((ieee80211_is_action(hdr->frame_control) || >>> ieee80211_is_deauth(hdr->frame_control) || >>> @@ -597,6 +604,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, >>> struct sk_buff *msdu) >>> goto err_free_msdu_id; >>> } >>> + dma_sync_single_range_for_cpu(dev, htt->txbuf.paddr, >>> txbuf_offset, >>> + sizeof(struct ath10k_htt_txbuf), >>> + DMA_TO_DEVICE); >>> + >>> + if (ar->hw_params.continuous_frag_desc) >>> + dma_sync_single_range_for_cpu(dev, htt->frag_desc.paddr, >>> + frag_offset, frag_size, >>> + DMA_TO_DEVICE); >>> + >>> switch (skb_cb->txmode) { >>> case ATH10K_HW_TXRX_RAW: >>> case ATH10K_HW_TXRX_NATIVE_WIFI: >>> @@ -723,6 +739,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, >>> struct sk_buff *msdu) >>> sg_items[1].paddr = skb_cb->paddr; >>> sg_items[1].len = prefetch_len; >>> + if (ar->hw_params.continuous_frag_desc) >>> + dma_sync_single_range_for_device(dev, htt->frag_desc.paddr, >>> + frag_offset, frag_size, >>> + DMA_TO_DEVICE); >>> + >>> + dma_sync_single_range_for_device(dev, htt->txbuf.paddr, >>> txbuf_offset, >>> + sizeof(struct ath10k_htt_txbuf), >>> + DMA_TO_DEVICE); >>> + >>> res = ath10k_hif_tx_sg(htt->ar, >>> htt->ar->htc.endpoint[htt->eid].ul_pipe_id, >>> sg_items, ARRAY_SIZE(sg_items)); >> >> >> _______________________________________________ >> ath10k mailing list >> ath10k@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/ath10k >> > > > _______________________________________________ > ath10k mailing list > ath10k@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/ath10k _______________________________________________ ath10k mailing list ath10k@lists.infradead.org http://lists.infradead.org/mailman/listinfo/ath10k