public inbox for linux-wireless@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ath11k: workaround firmware bug where peer_id=0
@ 2026-03-26 10:53 Matthew Leach
  2026-03-30  7:57 ` Matthew Leach
  0 siblings, 1 reply; 2+ messages in thread
From: Matthew Leach @ 2026-03-26 10:53 UTC (permalink / raw)
  To: Jeff Johnson; +Cc: linux-wireless, ath11k, linux-kernel, kernel, Matthew Leach

It has been observed that under certain conditions the ath11k firmware
sets the peer_id=0 for RX'd frames. For standard MPDUs this is fine as
ath11k_dp_rx_h_find_peer() has a fallback case where it locates the peer
based upon the source mac address.

However, on an aggregated link, reception of an A-MSDU results in the
peer not being resolved for the second (any any subsequent) sub-MSDUs.
This causes the encryption type of the frame to be set to an incorrect
value, resulting in the sub-MSDUs being dropped by ieee80211. Notice how
the flags differ in:

ath11k_pci 0000:03:00.0: data rx skb 000000002f4b704d len 1534 peer xx:xx:xx:xx:xx:xx 0 ucast sn 3063 he160 rate_idx 9 vht_nss 2 freq 5240 band 1 flag 0x40d1a fcs-err 0 mic-err 0 amsdu-more 0 peer_id 0 first_msdu 1 last_msdu 0
ath11k_pci 0000:03:00.0: data rx skb 0000000038acd580 len 1534 peer (null) 0 ucast sn 3063 he160 rate_idx 9 vht_nss 2 freq 5240 band 1 flag 0x40d00 fcs-err 0 mic-err 0 amsdu-more 0 peer_id 0 first_msdu 0 last_msdu 1

This patch caches the peer enctype during the MSDU processing loop,
caching it on the first AMSDU sub-frame (is_first_msdu=1 is_last_msdu=0)
and setting the correct enctype for any subsequent sub-MSDUs.

Signed-off-by: Matthew Leach <matthew.leach@collabora.com>
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 35 ++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 49d959b2e148..f5c2a8085a1b 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -21,6 +21,12 @@
 
 #define ATH11K_DP_RX_FRAGMENT_TIMEOUT_MS (2 * HZ)
 
+struct cached_peer_info {
+	enum hal_encrypt_type enctype;
+	u16 seq_no;
+	bool valid;
+};
+
 static inline
 u8 *ath11k_dp_rx_h_80211_hdr(struct ath11k_base *ab, struct hal_rx_desc *desc)
 {
@@ -2232,7 +2238,8 @@ ath11k_dp_rx_h_find_peer(struct ath11k_base *ab, struct sk_buff *msdu)
 static void ath11k_dp_rx_h_mpdu(struct ath11k *ar,
 				struct sk_buff *msdu,
 				struct hal_rx_desc *rx_desc,
-				struct ieee80211_rx_status *rx_status)
+				struct ieee80211_rx_status *rx_status,
+				struct cached_peer_info *peer_cache)
 {
 	bool  fill_crypto_hdr;
 	enum hal_encrypt_type enctype;
@@ -2265,6 +2272,21 @@ static void ath11k_dp_rx_h_mpdu(struct ath11k *ar,
 	}
 	spin_unlock_bh(&ar->ab->base_lock);
 
+	if (!rxcb->peer_id && rxcb->is_first_msdu && !rxcb->is_last_msdu) {
+		peer_cache->enctype = enctype;
+		peer_cache->seq_no = rxcb->seq_no;
+		peer_cache->valid = true;
+	}
+
+	if (!rxcb->peer_id && !rxcb->is_first_msdu && peer_cache->valid) {
+		if (rxcb->seq_no == peer_cache->seq_no)
+			enctype = peer_cache->enctype;
+		else
+			ath11k_dbg(ar->ab, ATH11K_DBG_DATA,
+				   "null peer_id workaround failed. cached seq_no=%d, msdu seq_no=%d",
+				   peer_cache->seq_no, rxcb->seq_no);
+	}
+
 	rx_attention = ath11k_dp_rx_get_attention(ar->ab, rx_desc);
 	err_bitmap = ath11k_dp_rx_h_attn_mpdu_err(rx_attention);
 	if (enctype != HAL_ENCRYPT_TYPE_OPEN && !err_bitmap)
@@ -2506,7 +2528,8 @@ static void ath11k_dp_rx_deliver_msdu(struct ath11k *ar, struct napi_struct *nap
 static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 				     struct sk_buff *msdu,
 				     struct sk_buff_head *msdu_list,
-				     struct ieee80211_rx_status *rx_status)
+				     struct ieee80211_rx_status *rx_status,
+				     struct cached_peer_info *peer_cache)
 {
 	struct ath11k_base *ab = ar->ab;
 	struct hal_rx_desc *rx_desc, *lrx_desc;
@@ -2574,7 +2597,7 @@ static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 	}
 
 	ath11k_dp_rx_h_ppdu(ar, rx_desc, rx_status);
-	ath11k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_status);
+	ath11k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_status, peer_cache);
 
 	rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED;
 
@@ -2592,6 +2615,7 @@ static void ath11k_dp_rx_process_received_packets(struct ath11k_base *ab,
 	struct sk_buff *msdu;
 	struct ath11k *ar;
 	struct ieee80211_rx_status rx_status = {};
+	struct cached_peer_info peer_cache = {};
 	int ret;
 
 	if (skb_queue_empty(msdu_list))
@@ -2609,7 +2633,7 @@ static void ath11k_dp_rx_process_received_packets(struct ath11k_base *ab,
 	}
 
 	while ((msdu = __skb_dequeue(msdu_list))) {
-		ret = ath11k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_status);
+		ret = ath11k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_status, &peer_cache);
 		if (unlikely(ret)) {
 			ath11k_dbg(ab, ATH11K_DBG_DATA,
 				   "Unable to process msdu %d", ret);
@@ -3959,6 +3983,7 @@ static int ath11k_dp_rx_h_null_q_desc(struct ath11k *ar, struct sk_buff *msdu,
 	u8 l3pad_bytes;
 	struct ath11k_skb_rxcb *rxcb = ATH11K_SKB_RXCB(msdu);
 	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
+	struct cached_peer_info peer_cache = {};
 
 	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(ar->ab, desc);
 
@@ -4002,7 +4027,7 @@ static int ath11k_dp_rx_h_null_q_desc(struct ath11k *ar, struct sk_buff *msdu,
 	}
 	ath11k_dp_rx_h_ppdu(ar, desc, status);
 
-	ath11k_dp_rx_h_mpdu(ar, msdu, desc, status);
+	ath11k_dp_rx_h_mpdu(ar, msdu, desc, status, &peer_cache);
 
 	rxcb->tid = ath11k_dp_rx_h_mpdu_start_tid(ar->ab, desc);
 

---
base-commit: f338e77383789c0cae23ca3d48adcc5e9e137e3c
change-id: 20260326-ath11k-null-peerid-workaround-625a129781b1

Best regards,
--  
Matt


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] ath11k: workaround firmware bug where peer_id=0
  2026-03-26 10:53 [PATCH] ath11k: workaround firmware bug where peer_id=0 Matthew Leach
@ 2026-03-30  7:57 ` Matthew Leach
  0 siblings, 0 replies; 2+ messages in thread
From: Matthew Leach @ 2026-03-30  7:57 UTC (permalink / raw)
  To: Jeff Johnson; +Cc: linux-wireless, ath11k, linux-kernel, kernel

Hello,

Matthew Leach <matthew.leach@collabora.com> writes:

> This patch caches the peer enctype during the MSDU processing loop,
> caching it on the first AMSDU sub-frame (is_first_msdu=1
> is_last_msdu=0) and setting the correct enctype for any subsequent
> sub-MSDUs.

I've been looking at creating a patch that addresses the root cause,
rather than patching incoming frame's flags:

--8<---------------cut here---------------start------------->8---
diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c
index 6d0126c39301..98348ccfdfbe 100644
--- a/drivers/net/wireless/ath/ath11k/peer.c
+++ b/drivers/net/wireless/ath/ath11k/peer.c
@@ -347,7 +347,7 @@ static int __ath11k_peer_delete(struct ath11k *ar, u32 vdev_id, const u8 *addr)
 	return 0;
 }
 
-int ath11k_peer_delete(struct ath11k *ar, u32 vdev_id, u8 *addr)
+int ath11k_peer_delete(struct ath11k *ar, u32 vdev_id, const u8 *addr)
 {
 	int ret;
 
@@ -372,7 +372,7 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 {
 	struct ath11k_peer *peer;
 	struct ath11k_sta *arsta;
-	int ret, fbret;
+	int ret, fbret, retries = 3;
 
 	lockdep_assert_held(&ar->conf_mutex);
 
@@ -400,6 +400,8 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 	spin_unlock_bh(&ar->ab->base_lock);
 	mutex_unlock(&ar->ab->tbl_mtx_lock);
 
+retry:
+
 	ret = ath11k_wmi_send_peer_create_cmd(ar, param);
 	if (ret) {
 		ath11k_warn(ar->ab,
@@ -427,6 +429,18 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 		goto cleanup;
 	}
 
+	if (!peer->peer_id) {
+		if (retries--) {
+			spin_unlock_bh(&ar->ab->base_lock);
+			mutex_unlock(&ar->ab->tbl_mtx_lock);
+			ath11k_peer_delete(ar, param->vdev_id, param->peer_addr);
+			goto retry;
+		} else {
+			ath11k_warn(ar->ab, "Null peer workaround failed for peer %pM, adding anyway",
+				    param->peer_addr);
+		}
+	}
+
 	ret = ath11k_peer_rhash_add(ar->ab, peer);
 	if (ret) {
 		spin_unlock_bh(&ar->ab->base_lock);
diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h
index 3ad2f3355b14..6325c4d157c7 100644
--- a/drivers/net/wireless/ath/ath11k/peer.h
+++ b/drivers/net/wireless/ath/ath11k/peer.h
@@ -47,7 +47,7 @@ struct ath11k_peer *ath11k_peer_find_by_addr(struct ath11k_base *ab,
 					     const u8 *addr);
 struct ath11k_peer *ath11k_peer_find_by_id(struct ath11k_base *ab, int peer_id);
 void ath11k_peer_cleanup(struct ath11k *ar, u32 vdev_id);
-int ath11k_peer_delete(struct ath11k *ar, u32 vdev_id, u8 *addr);
+int ath11k_peer_delete(struct ath11k *ar, u32 vdev_id, const u8 *addr);
 int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 		       struct ieee80211_sta *sta, struct peer_create_params *param);
 int ath11k_wait_for_peer_delete_done(struct ath11k *ar, u32 vdev_id,
--8<---------------cut here---------------end--------------->8---

This patch detects the error condition at the point where a peer map
request reply is received from the firmware. If the firmware maps with
peer_id=0, we request that the firmware unmap that peer and map again,
hoping it selects a peer_id!=0. We attempt this up to three times, at
which point we give up and let the peer be mapped with an ID of 0.

This patch addresses the root cause, but I think it's more invasive. I'd
appreciate some comments as to which approach upstream would prefer. If
the preference is for the above, I'll send out a v2.

Regards,
-- 
Matt

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-03-30  7:58 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-26 10:53 [PATCH] ath11k: workaround firmware bug where peer_id=0 Matthew Leach
2026-03-30  7:57 ` Matthew Leach

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox