* [PATCH 2/3] station: transition to ft-roaming on NETDEV_EVENT_ASSOCIATING
2025-08-15 15:26 [PATCH 1/3] netdev: check connected in channel switch event James Prestwood
@ 2025-08-15 15:26 ` James Prestwood
2025-08-15 15:26 ` [PATCH 3/3] netdev: handle local CMD_ASSOCIATE failures (for FT) James Prestwood
1 sibling, 0 replies; 3+ messages in thread
From: James Prestwood @ 2025-08-15 15:26 UTC (permalink / raw)
To: iwd; +Cc: James Prestwood
This is the station portion of a 2 part patch (netdev being the
other piece).
After CSA IE parsing was added to the kernel this opened up the
possibility that associations could be rejected locally based on
the contents of this CSA IE in the AP's beacons. Overall, it was
always possible for a local rejection but this case was never
considered by IWD. The CSA-based rejection is something that can
and does happen out in the wild.
When this association rejection happens it desync's IWD and the
kernel's state:
1. IWD begins an FT roam. Authenticates successfully, then proceeds
to calling netdev_ft_reassociate().
2. Immediately IWD transitions to a ft-roaming state and waits for
an association response.
3. CMD_ASSOCIATE is rejected by the kernel in the ACK which IWD
handles by sending a deauthenticate command to the kernel (since
we have a valid authentication to the new BSS).
4. Due to a bug IWD uses the target BSSID to deauthenticate which
the kernel rejects since it has no knowledge of this auth. This
error is not handled or logged.
5. IWD proceeds, assuming its deauthenticated, and transitions to a
disconnected state. The kernel remains "connected" which of course
prevents any future connections.
This patch addresses IWD's recovery behavior when the kernel rejects
a CMD_ASSOCIATION (for any reason, not just CSA-rejection)
- Now IWD will not change state until netdev signals that
CMD_ASSOCIATE was accepted (in subsequent patch). This signal will
come via the NETDEV_EVENT_ASSOCIATING event. If this event arrives,
and IWD is still in a "preparing_roam" state, we can proceed to
ft-roaming.
- If station_reassociate_cb() is called with a failure result but
IWD is still in a "preparing_roam" state the connection to the
current AP is assumed to be maintained and IWD can proceed to
trying more BSS's. Otherwise this indicates a failed roam.
Notes:
- The station_ft_work_ready() callback did need to be reworked to
keep the target roam_bss in the list when FT-Association is
started. This required modifying some of the other paths to pop
and free the roam_bss rather than doing that by default.
---
src/station.c | 61 +++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 49 insertions(+), 12 deletions(-)
diff --git a/src/station.c b/src/station.c
index d4bfe0e6..cc6cbe8b 100644
--- a/src/station.c
+++ b/src/station.c
@@ -2477,6 +2477,8 @@ delayed_retry:
station_roam_retry(station);
}
+static void station_transition_start(struct station *station);
+
static void station_reassociate_cb(struct netdev *netdev,
enum netdev_result result,
void *event_data,
@@ -2487,13 +2489,25 @@ static void station_reassociate_cb(struct netdev *netdev,
l_debug("%u, result: %d", netdev_get_ifindex(station->netdev), result);
if (station->state != STATION_STATE_ROAMING &&
- station->state != STATION_STATE_FT_ROAMING)
+ station->state != STATION_STATE_FT_ROAMING &&
+ !station->preparing_roam)
return;
if (result == NETDEV_RESULT_OK)
station_roamed(station);
- else
- station_roam_failed(station);
+ else {
+ /*
+ * If we are still in a preparing_roam state this means that
+ * the CMD_ASSOCIATE was rejected in the ACK. This rejection is
+ * recoverable since the kernel should not have changed any
+ * internal state. We can pop the current and try another BSS.
+ */
+ if (station->preparing_roam) {
+ l_free(l_queue_pop_head(station->roam_bss_list));
+ station_transition_start(station);
+ } else
+ station_roam_failed(station);
+ }
}
static void station_netdev_event(struct netdev *netdev, enum netdev_event event,
@@ -2606,13 +2620,10 @@ static void station_preauthenticate_cb(struct netdev *netdev,
station->hs = handshake_state_ref(new_hs);
}
-static void station_transition_start(struct station *station);
-
static bool station_ft_work_ready(struct wiphy_radio_work_item *item)
{
struct station *station = l_container_of(item, struct station, ft_work);
- _auto_(l_free) struct roam_bss *rbss = l_queue_pop_head(
- station->roam_bss_list);
+ struct roam_bss *rbss = l_queue_peek_head(station->roam_bss_list);
struct scan_bss *bss;
int ret;
@@ -2637,6 +2648,11 @@ static bool station_ft_work_ready(struct wiphy_radio_work_item *item)
l_debug("Re-inserting BSS "MAC" using reassociation, rank: %u",
MAC_STR(rbss->addr), rbss->rank);
+ /*
+ * Pop off the roam bss, then re-insert as there isn't a
+ * guarantee that it will end up back at the head
+ */
+ l_queue_pop_head(station->roam_bss_list);
l_queue_insert(station->roam_bss_list, rbss,
roam_bss_rank_compare, NULL);
@@ -2645,13 +2661,14 @@ static bool station_ft_work_ready(struct wiphy_radio_work_item *item)
MMPDU_STATUS_CODE_INVALID_PMKID);
station_transition_start(station);
- l_steal_ptr(rbss);
break;
case -ENOENT:
station_debug_event(station, "ft-roam-failed");
iwd_notice(IWD_NOTICE_FT_ROAM_FAILED,
"status: authentication timeout");
try_next:
+ l_queue_pop_head(station->roam_bss_list);
+ l_free(rbss);
station_transition_start(station);
break;
case 0:
@@ -2662,10 +2679,6 @@ try_next:
if (ret < 0)
goto disassociate;
- station->connected_bss = bss;
- station->preparing_roam = false;
- station_enter_state(station, STATION_STATE_FT_ROAMING);
-
break;
case -EINVAL:
/*
@@ -3901,6 +3914,8 @@ static void station_netdev_event(struct netdev *netdev, enum netdev_event event,
void *event_data, void *user_data)
{
struct station *station = user_data;
+ _auto_(l_free) struct roam_bss *rbss = NULL;
+ struct scan_bss *bss;
switch (event) {
case NETDEV_EVENT_AUTHENTICATING:
@@ -3908,6 +3923,28 @@ static void station_netdev_event(struct netdev *netdev, enum netdev_event event,
break;
case NETDEV_EVENT_ASSOCIATING:
station_debug_event(station, "associating");
+
+ if (!station->preparing_roam)
+ break;
+
+ /* Both !rbss and !bss should NEVER happen */
+ rbss = l_queue_pop_head(station->roam_bss_list);
+ if (L_WARN_ON(!rbss)) {
+ station_roam_failed(station);
+ return;
+ }
+
+ bss = network_bss_find_by_addr(station->connected_network,
+ rbss->addr);
+ if (L_WARN_ON(!bss)) {
+ station_roam_failed(station);
+ return;
+ }
+
+ station->connected_bss = bss;
+ station->preparing_roam = false;
+ station_enter_state(station, STATION_STATE_FT_ROAMING);
+
break;
case NETDEV_EVENT_DISCONNECT_BY_AP:
case NETDEV_EVENT_DISCONNECT_BY_SME:
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 3/3] netdev: handle local CMD_ASSOCIATE failures (for FT)
2025-08-15 15:26 [PATCH 1/3] netdev: check connected in channel switch event James Prestwood
2025-08-15 15:26 ` [PATCH 2/3] station: transition to ft-roaming on NETDEV_EVENT_ASSOCIATING James Prestwood
@ 2025-08-15 15:26 ` James Prestwood
1 sibling, 0 replies; 3+ messages in thread
From: James Prestwood @ 2025-08-15 15:26 UTC (permalink / raw)
To: iwd; +Cc: James Prestwood
The issue surrounding this was described in detail in the previous
patch. The netdev piece simple and does two things:
1. Don't deauthenticate and fail the connection if the CMD_ASSOCIATE
ACK comes back with a failure. A local rejection like this should
not change the kernels internal state, so we can recover. In this
case call the connect callback directly with a failure which
station will handle.
2. Upon a successful CMD_ASSOCIATE ACK, signal
NETDEV_EVENT_ASSOCIATING to let station know. This did require
modifying the netdev_associate_event() to not duplicate event
calls, specifically when FT is being used.
---
src/netdev.c | 32 +++++++++++++++++++++++++++-----
1 file changed, 27 insertions(+), 5 deletions(-)
diff --git a/src/netdev.c b/src/netdev.c
index ca8bfea0..e45423be 100644
--- a/src/netdev.c
+++ b/src/netdev.c
@@ -2993,15 +2993,32 @@ static void netdev_cmd_ft_reassociate_cb(struct l_genl_msg *msg,
void *user_data)
{
struct netdev *netdev = user_data;
+ int err = l_genl_msg_get_error(msg);
netdev->connect_cmd_id = 0;
- if (l_genl_msg_get_error(msg) >= 0)
+ /*
+ * If CMD_ASSOCIATE was accepted we're committed to association and
+ * can no longer go back. Signal this to station so the state can
+ * transition to ft-roaming.
+ */
+ if (err >= 0) {
+ if (netdev->event_filter)
+ netdev->event_filter(netdev, NETDEV_EVENT_ASSOCIATING,
+ NULL, netdev->user_data);
return;
+ }
- netdev_deauth_and_fail_connection(netdev,
- NETDEV_RESULT_ASSOCIATION_FAILED,
- MMPDU_STATUS_CODE_UNSPECIFIED);
+ l_debug("failed FT reassocaition (%d)", err);
+
+ /*
+ * A failed ACK should not have changed the kernel's state. This means
+ * we should still be connected to the current AP and can proceed to
+ * trying more BSS's.
+ */
+
+ netdev->connect_cb(netdev, NETDEV_RESULT_ASSOCIATION_FAILED, NULL,
+ netdev->user_data);
}
static bool kernel_will_retry_auth(uint16_t status_code,
@@ -3185,7 +3202,12 @@ static void netdev_associate_event(struct l_genl_msg *msg,
if (!netdev->connected || netdev->aborting)
return;
- if (netdev->event_filter)
+ /*
+ * For FT this event is sent in the CMD_ASSOCIATE ack to indicate
+ * association was successfully started in the kernel, don't duplicate
+ * and send here too.
+ */
+ if (!netdev->in_ft && netdev->event_filter)
netdev->event_filter(netdev, NETDEV_EVENT_ASSOCIATING,
NULL, netdev->user_data);
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread