All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jouni Malinen <jouni@codeaurora.org>
To: Kalle Valo <kvalo@codeaurora.org>
Cc: ath11k@lists.infradead.org, linux-wireless@vger.kernel.org,
	P Praneesh <ppranees@codeaurora.org>,
	Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>,
	Sriram R <srirrama@codeaurora.org>,
	Jouni Malinen <jouni@codeaurora.org>
Subject: [PATCH 02/12] ath11k: allocate dst ring descriptors from cacheable memory
Date: Wed, 16 Jun 2021 00:13:57 +0300	[thread overview]
Message-ID: <20210615211407.92233-3-jouni@codeaurora.org> (raw)
In-Reply-To: <20210615211407.92233-1-jouni@codeaurora.org>

From: P Praneesh <ppranees@codeaurora.org>

tcl_data and reo_dst rings are currently being allocated
using dma_allocate_coherent() which is non cachable.

Allocating ring memory from cacheable memory area
allows cached descriptor access and prefetch next
descriptors to optimize CPU usage during
descriptor processing on NAPI.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01695-QCAHKSWPL_SILICONZ-1

Co-developed-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Co-developed-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: P Praneesh <ppranees@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
---
 drivers/net/wireless/ath/ath11k/dp.c  | 34 +++++++++++++++++++++++----
 drivers/net/wireless/ath/ath11k/dp.h  |  1 +
 drivers/net/wireless/ath/ath11k/hal.c | 25 ++++++++++++++++++--
 drivers/net/wireless/ath/ath11k/hal.h |  1 +
 4 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index b0c8f6290099..cf869ebc209a 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -101,8 +101,11 @@ void ath11k_dp_srng_cleanup(struct ath11k_base *ab, struct dp_srng *ring)
 	if (!ring->vaddr_unaligned)
 		return;
 
-	dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
-			  ring->paddr_unaligned);
+	if (ring->cached)
+		kfree(ring->vaddr_unaligned);
+	else
+		dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
+				  ring->paddr_unaligned);
 
 	ring->vaddr_unaligned = NULL;
 }
@@ -222,6 +225,7 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 	int entry_sz = ath11k_hal_srng_get_entrysize(ab, type);
 	int max_entries = ath11k_hal_srng_get_max_entries(ab, type);
 	int ret;
+	bool cached;
 
 	if (max_entries < 0 || entry_sz < 0)
 		return -EINVAL;
@@ -229,10 +233,25 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 	if (num_entries > max_entries)
 		num_entries = max_entries;
 
+	/* Allocate the reo dst and tx completion rings from cacheable memory */
+	switch (type) {
+	case HAL_REO_DST:
+		cached = true;
+	default:
+		cached = false;
+	}
+
 	ring->size = (num_entries * entry_sz) + HAL_RING_BASE_ALIGN - 1;
-	ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
-						   &ring->paddr_unaligned,
-						   GFP_KERNEL);
+
+	if (cached) {
+		ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+		ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);
+	} else {
+		ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
+							   &ring->paddr_unaligned,
+							   GFP_KERNEL);
+	}
+
 	if (!ring->vaddr_unaligned)
 		return -ENOMEM;
 
@@ -292,6 +311,11 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 		return -EINVAL;
 	}
 
+	if (cached) {
+		params.flags |= HAL_SRNG_FLAGS_CACHED;
+		ring->cached = 1;
+	}
+
 	ret = ath11k_hal_srng_setup(ab, type, ring_num, mac_id, &params);
 	if (ret < 0) {
 		ath11k_warn(ab, "failed to setup srng: %d ring_id %d\n",
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index ee768ccce46e..e6591488a28c 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -64,6 +64,7 @@ struct dp_srng {
 	dma_addr_t paddr;
 	int size;
 	u32 ring_id;
+	u8 cached;
 };
 
 struct dp_rxdma_ring {
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index eaa0edca5576..a58e86e42b5b 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -627,6 +627,21 @@ u32 *ath11k_hal_srng_dst_peek(struct ath11k_base *ab, struct hal_srng *srng)
 	return NULL;
 }
 
+static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
+					  struct hal_srng *srng)
+{
+	u32 *desc;
+
+	/* prefetch only if desc is available */
+	desc = ath11k_hal_srng_dst_peek(ab, srng);
+	if (likely(desc)) {
+		dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
+					(srng->entry_size * sizeof(u32)),
+					DMA_FROM_DEVICE);
+		prefetch(desc);
+	}
+}
+
 u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
 					struct hal_srng *srng)
 {
@@ -642,6 +657,10 @@ u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
 	srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
 			      srng->ring_size;
 
+	/* Try to prefetch the next descriptor in the ring */
+	if (srng->flags & HAL_SRNG_FLAGS_CACHED)
+		ath11k_hal_srng_prefetch_desc(ab, srng);
+
 	return desc;
 }
 
@@ -775,11 +794,13 @@ void ath11k_hal_srng_access_begin(struct ath11k_base *ab, struct hal_srng *srng)
 {
 	lockdep_assert_held(&srng->lock);
 
-	if (srng->ring_dir == HAL_SRNG_DIR_SRC)
+	if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
 		srng->u.src_ring.cached_tp =
 			*(volatile u32 *)srng->u.src_ring.tp_addr;
-	else
+	} else {
 		srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
+		ath11k_hal_srng_prefetch_desc(ab, srng);
+	}
 }
 
 /* Update cached ring head/tail pointers to HW. ath11k_hal_srng_access_begin()
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 35ed3a14e200..0f4f9ce74354 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -513,6 +513,7 @@ enum hal_srng_dir {
 #define HAL_SRNG_FLAGS_DATA_TLV_SWAP		0x00000020
 #define HAL_SRNG_FLAGS_LOW_THRESH_INTR_EN	0x00010000
 #define HAL_SRNG_FLAGS_MSI_INTR			0x00020000
+#define HAL_SRNG_FLAGS_CACHED                   0x20000000
 #define HAL_SRNG_FLAGS_LMAC_RING		0x80000000
 
 #define HAL_SRNG_TLV_HDR_TAG		GENMASK(9, 1)
-- 
2.25.1


-- 
ath11k mailing list
ath11k@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/ath11k

WARNING: multiple messages have this Message-ID (diff)
From: Jouni Malinen <jouni@codeaurora.org>
To: Kalle Valo <kvalo@codeaurora.org>
Cc: ath11k@lists.infradead.org, linux-wireless@vger.kernel.org,
	P Praneesh <ppranees@codeaurora.org>,
	Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>,
	Sriram R <srirrama@codeaurora.org>,
	Jouni Malinen <jouni@codeaurora.org>
Subject: [PATCH 02/12] ath11k: allocate dst ring descriptors from cacheable memory
Date: Wed, 16 Jun 2021 00:13:57 +0300	[thread overview]
Message-ID: <20210615211407.92233-3-jouni@codeaurora.org> (raw)
In-Reply-To: <20210615211407.92233-1-jouni@codeaurora.org>

From: P Praneesh <ppranees@codeaurora.org>

tcl_data and reo_dst rings are currently being allocated
using dma_allocate_coherent() which is non cachable.

Allocating ring memory from cacheable memory area
allows cached descriptor access and prefetch next
descriptors to optimize CPU usage during
descriptor processing on NAPI.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01695-QCAHKSWPL_SILICONZ-1

Co-developed-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Co-developed-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: P Praneesh <ppranees@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
---
 drivers/net/wireless/ath/ath11k/dp.c  | 34 +++++++++++++++++++++++----
 drivers/net/wireless/ath/ath11k/dp.h  |  1 +
 drivers/net/wireless/ath/ath11k/hal.c | 25 ++++++++++++++++++--
 drivers/net/wireless/ath/ath11k/hal.h |  1 +
 4 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index b0c8f6290099..cf869ebc209a 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -101,8 +101,11 @@ void ath11k_dp_srng_cleanup(struct ath11k_base *ab, struct dp_srng *ring)
 	if (!ring->vaddr_unaligned)
 		return;
 
-	dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
-			  ring->paddr_unaligned);
+	if (ring->cached)
+		kfree(ring->vaddr_unaligned);
+	else
+		dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
+				  ring->paddr_unaligned);
 
 	ring->vaddr_unaligned = NULL;
 }
@@ -222,6 +225,7 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 	int entry_sz = ath11k_hal_srng_get_entrysize(ab, type);
 	int max_entries = ath11k_hal_srng_get_max_entries(ab, type);
 	int ret;
+	bool cached;
 
 	if (max_entries < 0 || entry_sz < 0)
 		return -EINVAL;
@@ -229,10 +233,25 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 	if (num_entries > max_entries)
 		num_entries = max_entries;
 
+	/* Allocate the reo dst and tx completion rings from cacheable memory */
+	switch (type) {
+	case HAL_REO_DST:
+		cached = true;
+	default:
+		cached = false;
+	}
+
 	ring->size = (num_entries * entry_sz) + HAL_RING_BASE_ALIGN - 1;
-	ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
-						   &ring->paddr_unaligned,
-						   GFP_KERNEL);
+
+	if (cached) {
+		ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+		ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);
+	} else {
+		ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
+							   &ring->paddr_unaligned,
+							   GFP_KERNEL);
+	}
+
 	if (!ring->vaddr_unaligned)
 		return -ENOMEM;
 
@@ -292,6 +311,11 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
 		return -EINVAL;
 	}
 
+	if (cached) {
+		params.flags |= HAL_SRNG_FLAGS_CACHED;
+		ring->cached = 1;
+	}
+
 	ret = ath11k_hal_srng_setup(ab, type, ring_num, mac_id, &params);
 	if (ret < 0) {
 		ath11k_warn(ab, "failed to setup srng: %d ring_id %d\n",
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index ee768ccce46e..e6591488a28c 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -64,6 +64,7 @@ struct dp_srng {
 	dma_addr_t paddr;
 	int size;
 	u32 ring_id;
+	u8 cached;
 };
 
 struct dp_rxdma_ring {
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index eaa0edca5576..a58e86e42b5b 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -627,6 +627,21 @@ u32 *ath11k_hal_srng_dst_peek(struct ath11k_base *ab, struct hal_srng *srng)
 	return NULL;
 }
 
+static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
+					  struct hal_srng *srng)
+{
+	u32 *desc;
+
+	/* prefetch only if desc is available */
+	desc = ath11k_hal_srng_dst_peek(ab, srng);
+	if (likely(desc)) {
+		dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
+					(srng->entry_size * sizeof(u32)),
+					DMA_FROM_DEVICE);
+		prefetch(desc);
+	}
+}
+
 u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
 					struct hal_srng *srng)
 {
@@ -642,6 +657,10 @@ u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
 	srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
 			      srng->ring_size;
 
+	/* Try to prefetch the next descriptor in the ring */
+	if (srng->flags & HAL_SRNG_FLAGS_CACHED)
+		ath11k_hal_srng_prefetch_desc(ab, srng);
+
 	return desc;
 }
 
@@ -775,11 +794,13 @@ void ath11k_hal_srng_access_begin(struct ath11k_base *ab, struct hal_srng *srng)
 {
 	lockdep_assert_held(&srng->lock);
 
-	if (srng->ring_dir == HAL_SRNG_DIR_SRC)
+	if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
 		srng->u.src_ring.cached_tp =
 			*(volatile u32 *)srng->u.src_ring.tp_addr;
-	else
+	} else {
 		srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
+		ath11k_hal_srng_prefetch_desc(ab, srng);
+	}
 }
 
 /* Update cached ring head/tail pointers to HW. ath11k_hal_srng_access_begin()
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 35ed3a14e200..0f4f9ce74354 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -513,6 +513,7 @@ enum hal_srng_dir {
 #define HAL_SRNG_FLAGS_DATA_TLV_SWAP		0x00000020
 #define HAL_SRNG_FLAGS_LOW_THRESH_INTR_EN	0x00010000
 #define HAL_SRNG_FLAGS_MSI_INTR			0x00020000
+#define HAL_SRNG_FLAGS_CACHED                   0x20000000
 #define HAL_SRNG_FLAGS_LMAC_RING		0x80000000
 
 #define HAL_SRNG_TLV_HDR_TAG		GENMASK(9, 1)
-- 
2.25.1


  parent reply	other threads:[~2021-06-15 22:44 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-15 21:13 [PATCH 00/12] ath11k: optimizations in data path Jouni Malinen
2021-06-15 21:13 ` Jouni Malinen
2021-06-15 21:13 ` [PATCH 01/12] ath11k: disable unused CE8 interrupts for ipq8074 Jouni Malinen
2021-06-15 21:13   ` Jouni Malinen
2021-06-15 21:13 ` Jouni Malinen [this message]
2021-06-15 21:13   ` [PATCH 02/12] ath11k: allocate dst ring descriptors from cacheable memory Jouni Malinen
2021-06-28 15:19   ` Jeff Johnson
2021-06-28 15:19     ` Jeff Johnson
2021-06-15 21:13 ` [PATCH 03/12] ath11k: modify dp_rx desc access wrapper calls inline Jouni Malinen
2021-06-15 21:13   ` Jouni Malinen
2021-06-15 21:13 ` [PATCH 04/12] ath11k: avoid additional access to ath11k_hal_srng_dst_num_free Jouni Malinen
2021-06-15 21:13   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 05/12] ath11k: avoid active pdev check for each msdu Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 06/12] ath11k: remove usage quota while processing rx packets Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 07/12] ath11k: add branch predictors in process_rx Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 08/12] ath11k: allocate HAL_WBM2SW_RELEASE ring from cacheable memory Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 09/12] ath11k: remove mod operator in dst ring processing Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 10/12] ath11k: avoid while loop in ring selection of tx completion interrupt Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 11/12] ath11k: add branch predictors in dp_tx path Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-15 21:14 ` [PATCH 12/12] ath11k: avoid unnecessary lock contention in tx_completion path Jouni Malinen
2021-06-15 21:14   ` Jouni Malinen
2021-06-29 17:35   ` Felix Fietkau
2021-06-29 17:35     ` Felix Fietkau
2021-06-30  5:17     ` P Praneesh
2021-06-30  5:17       ` P Praneesh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210615211407.92233-3-jouni@codeaurora.org \
    --to=jouni@codeaurora.org \
    --cc=ath11k@lists.infradead.org \
    --cc=kvalo@codeaurora.org \
    --cc=linux-wireless@vger.kernel.org \
    --cc=ppranees@codeaurora.org \
    --cc=pradeepc@codeaurora.org \
    --cc=srirrama@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.