ATH11K Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC 1/2] ath11k: drop tx_status_fifo
@ 2019-06-28 10:11 John Crispin
  2019-06-28 10:11 ` [RFC 2/2] ath11k: drop memset in tx status hotpath John Crispin
  2019-06-28 11:07 ` [RFC 1/2] ath11k: drop tx_status_fifo Vasanthakumar Thiagarajan
  0 siblings, 2 replies; 4+ messages in thread
From: John Crispin @ 2019-06-28 10:11 UTC (permalink / raw)
  To: Kalle Valo; +Cc: John Crispin, ath11k, Rajkumar Manoharan

perf top showed that 10% of the cpu time was spent on the memcpy used to
populate the fifo when the device is under high load. There is no apparent
benefit from using a fifo so lets just drop it.

Signed-off-by: John Crispin <john@phrozen.org>
---
 drivers/net/wireless/ath/ath11k/dp.c     | 13 ---------
 drivers/net/wireless/ath/ath11k/dp.h     |  5 ----
 drivers/net/wireless/ath/ath11k/dp_tx.c  | 37 +++++-------------------
 drivers/net/wireless/ath/ath11k/hal_tx.c |  5 ----
 drivers/net/wireless/ath/ath11k/hal_tx.h |  1 -
 5 files changed, 8 insertions(+), 53 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 3f5174a0c622..7e0a67d9e5c1 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -839,10 +839,6 @@ void ath11k_dp_free(struct ath11k_base *sc)
 			     ath11k_dp_tx_pending_cleanup, sc);
 		idr_destroy(&dp->tx_ring[i].txbuf_idr);
 		spin_unlock_bh(&dp->tx_ring[i].tx_idr_lock);
-
-		spin_lock_bh(&dp->tx_ring[i].tx_status_lock);
-		kfifo_free(&dp->tx_ring[i].tx_status_fifo);
-		spin_unlock_bh(&dp->tx_ring[i].tx_status_lock);
 	}
 
 	/* Deinit any SOC level resource */
@@ -888,12 +884,6 @@ int ath11k_dp_alloc(struct ath11k_base *sc)
 		idr_init(&dp->tx_ring[i].txbuf_idr);
 		spin_lock_init(&dp->tx_ring[i].tx_idr_lock);
 		dp->tx_ring[i].tcl_data_ring_id = i;
-
-		spin_lock_init(&dp->tx_ring[i].tx_status_lock);
-		ret = kfifo_alloc(&dp->tx_ring[i].tx_status_fifo, size,
-				  GFP_KERNEL);
-		if (ret)
-			goto fail_cmn_srng_cleanup;
 	}
 
 	for (i = 0; i < HAL_DSCP_TID_MAP_TBL_NUM_ENTRIES_MAX; i++)
@@ -903,9 +893,6 @@ int ath11k_dp_alloc(struct ath11k_base *sc)
 
 	return 0;
 
-fail_cmn_srng_cleanup:
-	ath11k_dp_srng_common_cleanup(sc);
-
 fail_link_desc_cleanup:
 	ath11k_dp_link_desc_cleanup(sc, dp->link_desc_banks,
 				    HAL_WBM_IDLE_LINK, &dp->wbm_idle_ring);
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index e09065690481..b654277cb68d 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -66,11 +66,6 @@ struct dp_tx_ring {
 	u32 num_tx_pending;
 	/* Protects txbuf_idr and num_pending */
 	spinlock_t tx_idr_lock;
-	DECLARE_KFIFO_PTR(tx_status_fifo, struct hal_wbm_release_ring);
-	/* lock to protect tx_status_fifo because tx_status_fifo can be
-	 * accessed concurrently.
-	 */
-	spinlock_t tx_status_lock;
 };
 
 struct ath11k_pdev_mon_stats {
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index e7229f6289c1..b3d5995fc657 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -448,47 +448,28 @@ void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
 	int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id;
 	struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id];
 	struct sk_buff *msdu;
-	struct hal_wbm_release_ring tx_status;
 	struct hal_tx_status ts;
 	struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id];
-	u32 *desc;
 	u32 msdu_id;
+	u32 *desc;
 	u8 mac_id;
 
 	spin_lock_bh(&status_ring->lock);
 
 	ath11k_hal_srng_access_begin(ab, status_ring);
 
-	spin_lock_bh(&tx_ring->tx_status_lock);
-	while (!kfifo_is_full(&tx_ring->tx_status_fifo) &&
-	       (desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) {
-		ath11k_hal_tx_status_desc_sync((void *)desc,
-					       (void *)&tx_status);
-		kfifo_put(&tx_ring->tx_status_fifo, tx_status);
-	}
-
-	if ((ath11k_hal_srng_dst_peek(ab, status_ring) != NULL) &&
-	    kfifo_is_full(&tx_ring->tx_status_fifo)) {
-		/* TODO: Process pending tx_status messages when kfifo_is_full() */
-		ath11k_warn(ab, "Unable to process some of the tx_status ring desc because status_fifo is full\n");
-	}
-
-	spin_unlock_bh(&tx_ring->tx_status_lock);
-
-	ath11k_hal_srng_access_end(ab, status_ring);
-	spin_unlock_bh(&status_ring->lock);
-
-	spin_lock_bh(&tx_ring->tx_status_lock);
-	while (kfifo_get(&tx_ring->tx_status_fifo, &tx_status)) {
+	while ((desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) {
 		memset(&ts, 0, sizeof(ts));
-		ath11k_hal_tx_status_parse(ab, &tx_status, &ts);
+		ath11k_hal_tx_status_parse(ab,
+					   (struct hal_wbm_release_ring *)desc,
+					   &ts);
 
 		mac_id = FIELD_GET(DP_TX_DESC_ID_MAC_ID, ts.desc_id);
 		msdu_id = FIELD_GET(DP_TX_DESC_ID_MSDU_ID, ts.desc_id);
 
 		if (ts.buf_rel_source == HAL_WBM_REL_SRC_MODULE_FW) {
 			ath11k_dp_tx_process_htt_tx_complete(ab,
-							     (void *)&tx_status,
+							     (void *)desc,
 							     mac_id, msdu_id,
 							     tx_ring);
 			continue;
@@ -511,12 +492,10 @@ void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
 		if (atomic_dec_and_test(&ar->dp.num_tx_pending))
 			wake_up(&ar->dp.tx_empty_waitq);
 
-		/* TODO: Locking optimization so that tx_completion for an msdu
-		 * is not called with tx_status_lock acquired
-		 */
 		ath11k_dp_tx_complete_msdu(ar, msdu, &ts);
 	}
-	spin_unlock_bh(&tx_ring->tx_status_lock);
+	ath11k_hal_srng_access_end(ab, status_ring);
+	spin_unlock_bh(&status_ring->lock);
 }
 
 int ath11k_dp_tx_send_reo_cmd(struct ath11k_base *ab, struct dp_rx_tid *rx_tid,
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c
index 3c04ebe23c22..899fb9356f2d 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.c
@@ -79,11 +79,6 @@ void ath11k_hal_tx_desc_sync(void *tx_desc_cached, void *hw_desc)
 	       sizeof(struct hal_tcl_data_cmd));
 }
 
-void ath11k_hal_tx_status_desc_sync(void *hw_desc, void *local_desc)
-{
-	memcpy(local_desc, hw_desc, HAL_TX_STATUS_DESC_LEN);
-}
-
 void ath11k_hal_tx_status_parse(struct ath11k_base *ab,
 				struct hal_wbm_release_ring *desc,
 				struct hal_tx_status *ts)
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.h b/drivers/net/wireless/ath/ath11k/hal_tx.h
index 3b12dfda7423..b37d585355a0 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.h
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.h
@@ -70,7 +70,6 @@ void ath11k_hal_tx_desc_sync(void *tx_desc_cached, void *hw_desc);
 void ath11k_hal_tx_status_parse(struct ath11k_base *ab,
 				struct hal_wbm_release_ring *desc,
 				struct hal_tx_status *ts);
-void ath11k_hal_tx_status_desc_sync(void *hw_desc, void *local_desc);
 void ath11k_hal_tx_set_dscp_tid_map(struct ath11k_base *ab, int id);
 int ath11k_hal_reo_cmd_send(struct ath11k_base *ab, struct hal_srng *srng,
 			    enum hal_reo_cmd_type type,
-- 
2.20.1


_______________________________________________
ath11k mailing list
ath11k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath11k

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC 2/2] ath11k: drop memset in tx status hotpath
  2019-06-28 10:11 [RFC 1/2] ath11k: drop tx_status_fifo John Crispin
@ 2019-06-28 10:11 ` John Crispin
  2019-06-28 11:21   ` Kalle Valo
  2019-06-28 11:07 ` [RFC 1/2] ath11k: drop tx_status_fifo Vasanthakumar Thiagarajan
  1 sibling, 1 reply; 4+ messages in thread
From: John Crispin @ 2019-06-28 10:11 UTC (permalink / raw)
  To: Kalle Valo; +Cc: John Crispin, ath11k, Rajkumar Manoharan

This in a small but measurable performance increase.

Signed-off-by: John Crispin <john@phrozen.org>
---
 drivers/net/wireless/ath/ath11k/dp_tx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index b3d5995fc657..8c5db3c88a87 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -448,7 +448,6 @@ void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
 	int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id;
 	struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id];
 	struct sk_buff *msdu;
-	struct hal_tx_status ts;
 	struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id];
 	u32 msdu_id;
 	u32 *desc;
@@ -459,7 +458,8 @@ void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
 	ath11k_hal_srng_access_begin(ab, status_ring);
 
 	while ((desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) {
-		memset(&ts, 0, sizeof(ts));
+		struct hal_tx_status ts = { 0 };
+
 		ath11k_hal_tx_status_parse(ab,
 					   (struct hal_wbm_release_ring *)desc,
 					   &ts);
-- 
2.20.1


_______________________________________________
ath11k mailing list
ath11k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath11k

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC 1/2] ath11k: drop tx_status_fifo
  2019-06-28 10:11 [RFC 1/2] ath11k: drop tx_status_fifo John Crispin
  2019-06-28 10:11 ` [RFC 2/2] ath11k: drop memset in tx status hotpath John Crispin
@ 2019-06-28 11:07 ` Vasanthakumar Thiagarajan
  1 sibling, 0 replies; 4+ messages in thread
From: Vasanthakumar Thiagarajan @ 2019-06-28 11:07 UTC (permalink / raw)
  To: John Crispin; +Cc: ath11k, Kalle Valo, Rajkumar Manoharan

On 2019-06-28 15:41, John Crispin wrote:
> perf top showed that 10% of the cpu time was spent on the memcpy used 
> to
> populate the fifo when the device is under high load. There is no 
> apparent
> benefit from using a fifo so lets just drop it.
> 
> Signed-off-by: John Crispin <john@phrozen.org>
> ---
>  drivers/net/wireless/ath/ath11k/dp.c     | 13 ---------
>  drivers/net/wireless/ath/ath11k/dp.h     |  5 ----
>  drivers/net/wireless/ath/ath11k/dp_tx.c  | 37 +++++-------------------
>  drivers/net/wireless/ath/ath11k/hal_tx.c |  5 ----
>  drivers/net/wireless/ath/ath11k/hal_tx.h |  1 -
>  5 files changed, 8 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/net/wireless/ath/ath11k/dp.c
> b/drivers/net/wireless/ath/ath11k/dp.c
> index 3f5174a0c622..7e0a67d9e5c1 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.c
> +++ b/drivers/net/wireless/ath/ath11k/dp.c
> @@ -839,10 +839,6 @@ void ath11k_dp_free(struct ath11k_base *sc)
>  			     ath11k_dp_tx_pending_cleanup, sc);
>  		idr_destroy(&dp->tx_ring[i].txbuf_idr);
>  		spin_unlock_bh(&dp->tx_ring[i].tx_idr_lock);
> -
> -		spin_lock_bh(&dp->tx_ring[i].tx_status_lock);
> -		kfifo_free(&dp->tx_ring[i].tx_status_fifo);
> -		spin_unlock_bh(&dp->tx_ring[i].tx_status_lock);
>  	}
> 
>  	/* Deinit any SOC level resource */
> @@ -888,12 +884,6 @@ int ath11k_dp_alloc(struct ath11k_base *sc)
>  		idr_init(&dp->tx_ring[i].txbuf_idr);
>  		spin_lock_init(&dp->tx_ring[i].tx_idr_lock);
>  		dp->tx_ring[i].tcl_data_ring_id = i;
> -
> -		spin_lock_init(&dp->tx_ring[i].tx_status_lock);
> -		ret = kfifo_alloc(&dp->tx_ring[i].tx_status_fifo, size,
> -				  GFP_KERNEL);
> -		if (ret)
> -			goto fail_cmn_srng_cleanup;
>  	}
> 
>  	for (i = 0; i < HAL_DSCP_TID_MAP_TBL_NUM_ENTRIES_MAX; i++)
> @@ -903,9 +893,6 @@ int ath11k_dp_alloc(struct ath11k_base *sc)
> 
>  	return 0;
> 
> -fail_cmn_srng_cleanup:
> -	ath11k_dp_srng_common_cleanup(sc);
> -
>  fail_link_desc_cleanup:
>  	ath11k_dp_link_desc_cleanup(sc, dp->link_desc_banks,
>  				    HAL_WBM_IDLE_LINK, &dp->wbm_idle_ring);
> diff --git a/drivers/net/wireless/ath/ath11k/dp.h
> b/drivers/net/wireless/ath/ath11k/dp.h
> index e09065690481..b654277cb68d 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.h
> +++ b/drivers/net/wireless/ath/ath11k/dp.h
> @@ -66,11 +66,6 @@ struct dp_tx_ring {
>  	u32 num_tx_pending;
>  	/* Protects txbuf_idr and num_pending */
>  	spinlock_t tx_idr_lock;
> -	DECLARE_KFIFO_PTR(tx_status_fifo, struct hal_wbm_release_ring);
> -	/* lock to protect tx_status_fifo because tx_status_fifo can be
> -	 * accessed concurrently.
> -	 */
> -	spinlock_t tx_status_lock;
>  };
> 
>  struct ath11k_pdev_mon_stats {
> diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c
> b/drivers/net/wireless/ath/ath11k/dp_tx.c
> index e7229f6289c1..b3d5995fc657 100644
> --- a/drivers/net/wireless/ath/ath11k/dp_tx.c
> +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
> @@ -448,47 +448,28 @@ void ath11k_dp_tx_completion_handler(struct
> ath11k_base *ab, int ring_id)
>  	int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id;
>  	struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id];
>  	struct sk_buff *msdu;
> -	struct hal_wbm_release_ring tx_status;
>  	struct hal_tx_status ts;
>  	struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id];
> -	u32 *desc;
>  	u32 msdu_id;
> +	u32 *desc;
>  	u8 mac_id;
> 
>  	spin_lock_bh(&status_ring->lock);
> 
>  	ath11k_hal_srng_access_begin(ab, status_ring);
> 
> -	spin_lock_bh(&tx_ring->tx_status_lock);
> -	while (!kfifo_is_full(&tx_ring->tx_status_fifo) &&
> -	       (desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) 
> {
> -		ath11k_hal_tx_status_desc_sync((void *)desc,
> -					       (void *)&tx_status);
> -		kfifo_put(&tx_ring->tx_status_fifo, tx_status);
> -	}
> -
> -	if ((ath11k_hal_srng_dst_peek(ab, status_ring) != NULL) &&
> -	    kfifo_is_full(&tx_ring->tx_status_fifo)) {
> -		/* TODO: Process pending tx_status messages when kfifo_is_full() */
> -		ath11k_warn(ab, "Unable to process some of the tx_status ring desc
> because status_fifo is full\n");
> -	}
> -
> -	spin_unlock_bh(&tx_ring->tx_status_lock);
> -
> -	ath11k_hal_srng_access_end(ab, status_ring);
> -	spin_unlock_bh(&status_ring->lock);
> -
> -	spin_lock_bh(&tx_ring->tx_status_lock);
> -	while (kfifo_get(&tx_ring->tx_status_fifo, &tx_status)) {
> +	while ((desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) 
> {
>  		memset(&ts, 0, sizeof(ts));
> -		ath11k_hal_tx_status_parse(ab, &tx_status, &ts);
> +		ath11k_hal_tx_status_parse(ab,
> +					   (struct hal_wbm_release_ring *)desc,
> +					   &ts);
> 
>  		mac_id = FIELD_GET(DP_TX_DESC_ID_MAC_ID, ts.desc_id);
>  		msdu_id = FIELD_GET(DP_TX_DESC_ID_MSDU_ID, ts.desc_id);
> 
>  		if (ts.buf_rel_source == HAL_WBM_REL_SRC_MODULE_FW) {
>  			ath11k_dp_tx_process_htt_tx_complete(ab,
> -							     (void *)&tx_status,
> +							     (void *)desc,
>  							     mac_id, msdu_id,
>  							     tx_ring);

Delay in reaping tx completion desc from hw ring could trigger 
backpressure on tx completion ring which could result
in target assert. Extensive testing with this change is required to make 
sure this change does not cause any issue.

Vasanth

_______________________________________________
ath11k mailing list
ath11k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath11k

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC 2/2] ath11k: drop memset in tx status hotpath
  2019-06-28 10:11 ` [RFC 2/2] ath11k: drop memset in tx status hotpath John Crispin
@ 2019-06-28 11:21   ` Kalle Valo
  0 siblings, 0 replies; 4+ messages in thread
From: Kalle Valo @ 2019-06-28 11:21 UTC (permalink / raw)
  To: John Crispin; +Cc: ath11k, Rajkumar Manoharan

John Crispin <john@phrozen.org> writes:

> This in a small but measurable performance increase.
>
> Signed-off-by: John Crispin <john@phrozen.org>
> ---
>  drivers/net/wireless/ath/ath11k/dp_tx.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
> index b3d5995fc657..8c5db3c88a87 100644
> --- a/drivers/net/wireless/ath/ath11k/dp_tx.c
> +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
> @@ -448,7 +448,6 @@ void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
>  	int hal_ring_id = dp->tx_ring[ring_id].tcl_comp_ring.ring_id;
>  	struct hal_srng *status_ring = &ab->hal.srng_list[hal_ring_id];
>  	struct sk_buff *msdu;
> -	struct hal_tx_status ts;
>  	struct dp_tx_ring *tx_ring = &dp->tx_ring[ring_id];
>  	u32 msdu_id;
>  	u32 *desc;
> @@ -459,7 +458,8 @@ void ath11k_dp_tx_completion_handler(struct ath11k_base *ab, int ring_id)
>  	ath11k_hal_srng_access_begin(ab, status_ring);
>  
>  	while ((desc = ath11k_hal_srng_dst_get_next_entry(ab, status_ring))) {
> -		memset(&ts, 0, sizeof(ts));
> +		struct hal_tx_status ts = { 0 };

Very interesting, so a compiler zeroes memory from stack faster than
memset() from "heap" (or whatever kmalloced memory should be called)?
Just out of curiosity, any ideas why memset() is so slow?

Also some kind of numbers (before and after) would be nice to have in
the commit log. It's not uncommon that years after we investigate why a
certain change was made.

-- 
Kalle Valo

_______________________________________________
ath11k mailing list
ath11k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath11k

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-06-28 11:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-28 10:11 [RFC 1/2] ath11k: drop tx_status_fifo John Crispin
2019-06-28 10:11 ` [RFC 2/2] ath11k: drop memset in tx status hotpath John Crispin
2019-06-28 11:21   ` Kalle Valo
2019-06-28 11:07 ` [RFC 1/2] ath11k: drop tx_status_fifo Vasanthakumar Thiagarajan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox