linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
@ 2025-07-15 20:57 carlos.bilbao
  2025-07-15 20:59 ` Carlos Bilbao
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: carlos.bilbao @ 2025-07-15 20:57 UTC (permalink / raw)
  To: jv, andrew+netdev, davem, edumazet, kuba, pabeni, horms, netdev,
	linux-kernel
  Cc: sforshee, bilbao, Carlos Bilbao

From: Carlos Bilbao <carlos.bilbao@kernel.org>

Replace the bonding periodic state machine for LACPDU transmission of
function ad_periodic_machine() with a jiffies-based mechanism, which is
more accurate and can help reduce drift under contention.

Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
---
 drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
 include/net/bond_3ad.h         |  2 +-
 2 files changed, 32 insertions(+), 49 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c6807e473ab7..8654a51266a3 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1421,44 +1421,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
 	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
 	    !bond_params->lacp_active) {
 		port->sm_periodic_state = AD_NO_PERIODIC;
-	}
-	/* check if state machine should change state */
-	else if (port->sm_periodic_timer_counter) {
-		/* check if periodic state machine expired */
-		if (!(--port->sm_periodic_timer_counter)) {
-			/* if expired then do tx */
-			port->sm_periodic_state = AD_PERIODIC_TX;
-		} else {
-			/* If not expired, check if there is some new timeout
-			 * parameter from the partner state
-			 */
-			switch (port->sm_periodic_state) {
-			case AD_FAST_PERIODIC:
-				if (!(port->partner_oper.port_state
-				      & LACP_STATE_LACP_TIMEOUT))
-					port->sm_periodic_state = AD_SLOW_PERIODIC;
-				break;
-			case AD_SLOW_PERIODIC:
-				if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) {
-					port->sm_periodic_timer_counter = 0;
-					port->sm_periodic_state = AD_PERIODIC_TX;
-				}
-				break;
-			default:
-				break;
-			}
-		}
+	} else if (port->sm_periodic_state == AD_NO_PERIODIC)
+		port->sm_periodic_state = AD_FAST_PERIODIC;
+	/* check if periodic state machine expired */
+	else if (time_after_eq(jiffies, port->sm_periodic_next_jiffies)) {
+		/* if expired then do tx */
+		port->sm_periodic_state = AD_PERIODIC_TX;
 	} else {
+		/* If not expired, check if there is some new timeout
+		 * parameter from the partner state
+		 */
 		switch (port->sm_periodic_state) {
-		case AD_NO_PERIODIC:
-			port->sm_periodic_state = AD_FAST_PERIODIC;
-			break;
-		case AD_PERIODIC_TX:
-			if (!(port->partner_oper.port_state &
-			    LACP_STATE_LACP_TIMEOUT))
+		case AD_FAST_PERIODIC:
+			if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
 				port->sm_periodic_state = AD_SLOW_PERIODIC;
-			else
-				port->sm_periodic_state = AD_FAST_PERIODIC;
+			break;
+		case AD_SLOW_PERIODIC:
+			if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
+				port->sm_periodic_state = AD_PERIODIC_TX;
 			break;
 		default:
 			break;
@@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
 			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
 			  port->actor_port_number, last_state,
 			  port->sm_periodic_state);
+
 		switch (port->sm_periodic_state) {
-		case AD_NO_PERIODIC:
-			port->sm_periodic_timer_counter = 0;
-			break;
-		case AD_FAST_PERIODIC:
-			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
-			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
-			break;
-		case AD_SLOW_PERIODIC:
-			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
-			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
-			break;
 		case AD_PERIODIC_TX:
 			port->ntt = true;
-			break;
+			if (!(port->partner_oper.port_state &
+						LACP_STATE_LACP_TIMEOUT))
+				port->sm_periodic_state = AD_SLOW_PERIODIC;
+			else
+				port->sm_periodic_state = AD_FAST_PERIODIC;
+		fallthrough;
+		case AD_SLOW_PERIODIC:
+		case AD_FAST_PERIODIC:
+			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
+				port->sm_periodic_next_jiffies = jiffies
+					+ HZ * AD_SLOW_PERIODIC_TIME;
+			else /* AD_FAST_PERIODIC */
+				port->sm_periodic_next_jiffies = jiffies
+					+ HZ * AD_FAST_PERIODIC_TIME;
 		default:
 			break;
 		}
@@ -1987,7 +1970,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
 		port->sm_rx_state = 0;
 		port->sm_rx_timer_counter = 0;
 		port->sm_periodic_state = 0;
-		port->sm_periodic_timer_counter = 0;
+		port->sm_periodic_next_jiffies = 0;
 		port->sm_mux_state = 0;
 		port->sm_mux_timer_counter = 0;
 		port->sm_tx_state = 0;
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 2053cd8e788a..aabb8c97caf4 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -227,7 +227,7 @@ typedef struct port {
 	rx_states_t sm_rx_state;	/* state machine rx state */
 	u16 sm_rx_timer_counter;	/* state machine rx timer counter */
 	periodic_states_t sm_periodic_state;	/* state machine periodic state */
-	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */
+	unsigned long sm_periodic_next_jiffies;	/* state machine periodic next expected sent */
 	mux_states_t sm_mux_state;	/* state machine mux state */
 	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
 	tx_states_t sm_tx_state;	/* state machine tx state */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-15 20:57 [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies carlos.bilbao
@ 2025-07-15 20:59 ` Carlos Bilbao
  2025-07-16 15:30   ` Jay Vosburgh
  2025-08-05 13:14   ` Hangbin Liu
  2025-07-16  9:09 ` Simon Horman
  2025-07-17  4:35 ` kernel test robot
  2 siblings, 2 replies; 9+ messages in thread
From: Carlos Bilbao @ 2025-07-15 20:59 UTC (permalink / raw)
  To: carlos.bilbao, jv, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms, netdev, linux-kernel
  Cc: sforshee

FYI, I was able to test this locally but couldn’t find any kselftests to
stress the bonding state machine. If anyone knows of additional ways to
test it, I’d be happy to run them.

Thanks!

Carlos

On 7/15/25 15:57, carlos.bilbao@kernel.org wrote:
> From: Carlos Bilbao <carlos.bilbao@kernel.org>
>
> Replace the bonding periodic state machine for LACPDU transmission of
> function ad_periodic_machine() with a jiffies-based mechanism, which is
> more accurate and can help reduce drift under contention.
>
> Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
> ---
>   drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
>   include/net/bond_3ad.h         |  2 +-
>   2 files changed, 32 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
> index c6807e473ab7..8654a51266a3 100644
> --- a/drivers/net/bonding/bond_3ad.c
> +++ b/drivers/net/bonding/bond_3ad.c
> @@ -1421,44 +1421,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>   	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
>   	    !bond_params->lacp_active) {
>   		port->sm_periodic_state = AD_NO_PERIODIC;
> -	}
> -	/* check if state machine should change state */
> -	else if (port->sm_periodic_timer_counter) {
> -		/* check if periodic state machine expired */
> -		if (!(--port->sm_periodic_timer_counter)) {
> -			/* if expired then do tx */
> -			port->sm_periodic_state = AD_PERIODIC_TX;
> -		} else {
> -			/* If not expired, check if there is some new timeout
> -			 * parameter from the partner state
> -			 */
> -			switch (port->sm_periodic_state) {
> -			case AD_FAST_PERIODIC:
> -				if (!(port->partner_oper.port_state
> -				      & LACP_STATE_LACP_TIMEOUT))
> -					port->sm_periodic_state = AD_SLOW_PERIODIC;
> -				break;
> -			case AD_SLOW_PERIODIC:
> -				if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) {
> -					port->sm_periodic_timer_counter = 0;
> -					port->sm_periodic_state = AD_PERIODIC_TX;
> -				}
> -				break;
> -			default:
> -				break;
> -			}
> -		}
> +	} else if (port->sm_periodic_state == AD_NO_PERIODIC)
> +		port->sm_periodic_state = AD_FAST_PERIODIC;
> +	/* check if periodic state machine expired */
> +	else if (time_after_eq(jiffies, port->sm_periodic_next_jiffies)) {
> +		/* if expired then do tx */
> +		port->sm_periodic_state = AD_PERIODIC_TX;
>   	} else {
> +		/* If not expired, check if there is some new timeout
> +		 * parameter from the partner state
> +		 */
>   		switch (port->sm_periodic_state) {
> -		case AD_NO_PERIODIC:
> -			port->sm_periodic_state = AD_FAST_PERIODIC;
> -			break;
> -		case AD_PERIODIC_TX:
> -			if (!(port->partner_oper.port_state &
> -			    LACP_STATE_LACP_TIMEOUT))
> +		case AD_FAST_PERIODIC:
> +			if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>   				port->sm_periodic_state = AD_SLOW_PERIODIC;
> -			else
> -				port->sm_periodic_state = AD_FAST_PERIODIC;
> +			break;
> +		case AD_SLOW_PERIODIC:
> +			if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
> +				port->sm_periodic_state = AD_PERIODIC_TX;
>   			break;
>   		default:
>   			break;
> @@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>   			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
>   			  port->actor_port_number, last_state,
>   			  port->sm_periodic_state);
> +
>   		switch (port->sm_periodic_state) {
> -		case AD_NO_PERIODIC:
> -			port->sm_periodic_timer_counter = 0;
> -			break;
> -		case AD_FAST_PERIODIC:
> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
> -			break;
> -		case AD_SLOW_PERIODIC:
> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
> -			break;
>   		case AD_PERIODIC_TX:
>   			port->ntt = true;
> -			break;
> +			if (!(port->partner_oper.port_state &
> +						LACP_STATE_LACP_TIMEOUT))
> +				port->sm_periodic_state = AD_SLOW_PERIODIC;
> +			else
> +				port->sm_periodic_state = AD_FAST_PERIODIC;
> +		fallthrough;
> +		case AD_SLOW_PERIODIC:
> +		case AD_FAST_PERIODIC:
> +			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
> +				port->sm_periodic_next_jiffies = jiffies
> +					+ HZ * AD_SLOW_PERIODIC_TIME;
> +			else /* AD_FAST_PERIODIC */
> +				port->sm_periodic_next_jiffies = jiffies
> +					+ HZ * AD_FAST_PERIODIC_TIME;
>   		default:
>   			break;
>   		}
> @@ -1987,7 +1970,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
>   		port->sm_rx_state = 0;
>   		port->sm_rx_timer_counter = 0;
>   		port->sm_periodic_state = 0;
> -		port->sm_periodic_timer_counter = 0;
> +		port->sm_periodic_next_jiffies = 0;
>   		port->sm_mux_state = 0;
>   		port->sm_mux_timer_counter = 0;
>   		port->sm_tx_state = 0;
> diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
> index 2053cd8e788a..aabb8c97caf4 100644
> --- a/include/net/bond_3ad.h
> +++ b/include/net/bond_3ad.h
> @@ -227,7 +227,7 @@ typedef struct port {
>   	rx_states_t sm_rx_state;	/* state machine rx state */
>   	u16 sm_rx_timer_counter;	/* state machine rx timer counter */
>   	periodic_states_t sm_periodic_state;	/* state machine periodic state */
> -	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */
> +	unsigned long sm_periodic_next_jiffies;	/* state machine periodic next expected sent */
>   	mux_states_t sm_mux_state;	/* state machine mux state */
>   	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
>   	tx_states_t sm_tx_state;	/* state machine tx state */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-15 20:57 [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies carlos.bilbao
  2025-07-15 20:59 ` Carlos Bilbao
@ 2025-07-16  9:09 ` Simon Horman
  2025-07-16 19:45   ` Carlos Bilbao
  2025-07-17  4:35 ` kernel test robot
  2 siblings, 1 reply; 9+ messages in thread
From: Simon Horman @ 2025-07-16  9:09 UTC (permalink / raw)
  To: carlos.bilbao
  Cc: jv, andrew+netdev, davem, edumazet, kuba, pabeni, netdev,
	linux-kernel, sforshee, bilbao

On Tue, Jul 15, 2025 at 03:57:33PM -0500, carlos.bilbao@kernel.org wrote:
> From: Carlos Bilbao <carlos.bilbao@kernel.org>
> 
> Replace the bonding periodic state machine for LACPDU transmission of
> function ad_periodic_machine() with a jiffies-based mechanism, which is
> more accurate and can help reduce drift under contention.
> 
> Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
> ---
>  drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
>  include/net/bond_3ad.h         |  2 +-
>  2 files changed, 32 insertions(+), 49 deletions(-)
> 

Hi Carlos,

Some minor feedback from my side.

> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c

...

> @@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>  			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
>  			  port->actor_port_number, last_state,
>  			  port->sm_periodic_state);
> +
>  		switch (port->sm_periodic_state) {
> -		case AD_NO_PERIODIC:
> -			port->sm_periodic_timer_counter = 0;
> -			break;
> -		case AD_FAST_PERIODIC:
> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
> -			break;
> -		case AD_SLOW_PERIODIC:
> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
> -			break;
>  		case AD_PERIODIC_TX:
>  			port->ntt = true;
> -			break;
> +			if (!(port->partner_oper.port_state &
> +						LACP_STATE_LACP_TIMEOUT))
> +				port->sm_periodic_state = AD_SLOW_PERIODIC;
> +			else
> +				port->sm_periodic_state = AD_FAST_PERIODIC;
> +		fallthrough;

super-nit: maybe one more tab of indentation for the line above.

> +		case AD_SLOW_PERIODIC:
> +		case AD_FAST_PERIODIC:
> +			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
> +				port->sm_periodic_next_jiffies = jiffies
> +					+ HZ * AD_SLOW_PERIODIC_TIME;
> +			else /* AD_FAST_PERIODIC */
> +				port->sm_periodic_next_jiffies = jiffies
> +					+ HZ * AD_FAST_PERIODIC_TIME;

Clang 20.1.8 complains that either a break; or fallthrough; should go here.
For consistency with the code above I'd suggest the latter.


>  		default:
>  			break;
>  		}

...

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-15 20:59 ` Carlos Bilbao
@ 2025-07-16 15:30   ` Jay Vosburgh
  2025-07-16 19:44     ` Carlos Bilbao
  2025-08-05 13:14   ` Hangbin Liu
  1 sibling, 1 reply; 9+ messages in thread
From: Jay Vosburgh @ 2025-07-16 15:30 UTC (permalink / raw)
  To: Carlos Bilbao
  Cc: carlos.bilbao, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms, netdev, linux-kernel, sforshee

Carlos Bilbao <bilbao@vt.edu> wrote:

>FYI, I was able to test this locally but couldn’t find any kselftests to
>stress the bonding state machine. If anyone knows of additional ways to
>test it, I’d be happy to run them.

	Your commit message says this change will "help reduce drift
under contention," but above you say you're unable to stress the state
machine.

	How do you induce "drift under contention" to test that your
patch actually improves something?  What testing has been done to insure
that the new code doesn't change the behavior in other ways (regressions)?

	Without a specific reproducable bug scenario that this change
fixes, I'm leery of applying such a refactor to code that has seemingly
been working fine for 20+ years.

	I gather that what this is intending to do is reduce the current
dependency on the scheduling accuracy of the workqueue event that runs
the state machines.  The current implementation works on a "number of
invocations" basis, assuming that the event is invoked every 100 msec,
and computes various timeouts based on the number of times the state
machine runs.

	-J

>Thanks!
>
>Carlos
>
>On 7/15/25 15:57, carlos.bilbao@kernel.org wrote:
>> From: Carlos Bilbao <carlos.bilbao@kernel.org>
>>
>> Replace the bonding periodic state machine for LACPDU transmission of
>> function ad_periodic_machine() with a jiffies-based mechanism, which is
>> more accurate and can help reduce drift under contention.
>>
>> Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
>> ---
>>   drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
>>   include/net/bond_3ad.h         |  2 +-
>>   2 files changed, 32 insertions(+), 49 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>> index c6807e473ab7..8654a51266a3 100644
>> --- a/drivers/net/bonding/bond_3ad.c
>> +++ b/drivers/net/bonding/bond_3ad.c
>> @@ -1421,44 +1421,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>   	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
>>   	    !bond_params->lacp_active) {
>>   		port->sm_periodic_state = AD_NO_PERIODIC;
>> -	}
>> -	/* check if state machine should change state */
>> -	else if (port->sm_periodic_timer_counter) {
>> -		/* check if periodic state machine expired */
>> -		if (!(--port->sm_periodic_timer_counter)) {
>> -			/* if expired then do tx */
>> -			port->sm_periodic_state = AD_PERIODIC_TX;
>> -		} else {
>> -			/* If not expired, check if there is some new timeout
>> -			 * parameter from the partner state
>> -			 */
>> -			switch (port->sm_periodic_state) {
>> -			case AD_FAST_PERIODIC:
>> -				if (!(port->partner_oper.port_state
>> -				      & LACP_STATE_LACP_TIMEOUT))
>> -					port->sm_periodic_state = AD_SLOW_PERIODIC;
>> -				break;
>> -			case AD_SLOW_PERIODIC:
>> -				if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) {
>> -					port->sm_periodic_timer_counter = 0;
>> -					port->sm_periodic_state = AD_PERIODIC_TX;
>> -				}
>> -				break;
>> -			default:
>> -				break;
>> -			}
>> -		}
>> +	} else if (port->sm_periodic_state == AD_NO_PERIODIC)
>> +		port->sm_periodic_state = AD_FAST_PERIODIC;
>> +	/* check if periodic state machine expired */
>> +	else if (time_after_eq(jiffies, port->sm_periodic_next_jiffies)) {
>> +		/* if expired then do tx */
>> +		port->sm_periodic_state = AD_PERIODIC_TX;
>>   	} else {
>> +		/* If not expired, check if there is some new timeout
>> +		 * parameter from the partner state
>> +		 */
>>   		switch (port->sm_periodic_state) {
>> -		case AD_NO_PERIODIC:
>> -			port->sm_periodic_state = AD_FAST_PERIODIC;
>> -			break;
>> -		case AD_PERIODIC_TX:
>> -			if (!(port->partner_oper.port_state &
>> -			    LACP_STATE_LACP_TIMEOUT))
>> +		case AD_FAST_PERIODIC:
>> +			if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>>   				port->sm_periodic_state = AD_SLOW_PERIODIC;
>> -			else
>> -				port->sm_periodic_state = AD_FAST_PERIODIC;
>> +			break;
>> +		case AD_SLOW_PERIODIC:
>> +			if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>> +				port->sm_periodic_state = AD_PERIODIC_TX;
>>   			break;
>>   		default:
>>   			break;
>> @@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>   			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
>>   			  port->actor_port_number, last_state,
>>   			  port->sm_periodic_state);
>> +
>>   		switch (port->sm_periodic_state) {
>> -		case AD_NO_PERIODIC:
>> -			port->sm_periodic_timer_counter = 0;
>> -			break;
>> -		case AD_FAST_PERIODIC:
>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
>> -			break;
>> -		case AD_SLOW_PERIODIC:
>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
>> -			break;
>>   		case AD_PERIODIC_TX:
>>   			port->ntt = true;
>> -			break;
>> +			if (!(port->partner_oper.port_state &
>> +						LACP_STATE_LACP_TIMEOUT))
>> +				port->sm_periodic_state = AD_SLOW_PERIODIC;
>> +			else
>> +				port->sm_periodic_state = AD_FAST_PERIODIC;
>> +		fallthrough;
>> +		case AD_SLOW_PERIODIC:
>> +		case AD_FAST_PERIODIC:
>> +			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
>> +				port->sm_periodic_next_jiffies = jiffies
>> +					+ HZ * AD_SLOW_PERIODIC_TIME;
>> +			else /* AD_FAST_PERIODIC */
>> +				port->sm_periodic_next_jiffies = jiffies
>> +					+ HZ * AD_FAST_PERIODIC_TIME;
>>   		default:
>>   			break;
>>   		}
>> @@ -1987,7 +1970,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
>>   		port->sm_rx_state = 0;
>>   		port->sm_rx_timer_counter = 0;
>>   		port->sm_periodic_state = 0;
>> -		port->sm_periodic_timer_counter = 0;
>> +		port->sm_periodic_next_jiffies = 0;
>>   		port->sm_mux_state = 0;
>>   		port->sm_mux_timer_counter = 0;
>>   		port->sm_tx_state = 0;
>> diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
>> index 2053cd8e788a..aabb8c97caf4 100644
>> --- a/include/net/bond_3ad.h
>> +++ b/include/net/bond_3ad.h
>> @@ -227,7 +227,7 @@ typedef struct port {
>>   	rx_states_t sm_rx_state;	/* state machine rx state */
>>   	u16 sm_rx_timer_counter;	/* state machine rx timer counter */
>>   	periodic_states_t sm_periodic_state;	/* state machine periodic state */
>> -	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */
>> +	unsigned long sm_periodic_next_jiffies;	/* state machine periodic next expected sent */
>>   	mux_states_t sm_mux_state;	/* state machine mux state */
>>   	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
>>   	tx_states_t sm_tx_state;	/* state machine tx state */

---
	-Jay Vosburgh, jv@jvosburgh.net


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-16 15:30   ` Jay Vosburgh
@ 2025-07-16 19:44     ` Carlos Bilbao
  2025-07-16 23:33       ` Jay Vosburgh
  0 siblings, 1 reply; 9+ messages in thread
From: Carlos Bilbao @ 2025-07-16 19:44 UTC (permalink / raw)
  To: Jay Vosburgh, Carlos Bilbao
  Cc: carlos.bilbao, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms, netdev, linux-kernel, sforshee

Hello Jay,

On 7/16/25 10:30, Jay Vosburgh wrote:
> Carlos Bilbao <bilbao@vt.edu> wrote:
>
>> FYI, I was able to test this locally but couldn’t find any kselftests to
>> stress the bonding state machine. If anyone knows of additional ways to
>> test it, I’d be happy to run them.
> 	Your commit message says this change will "help reduce drift
> under contention," but above you say you're unable to stress the state
> machine.
>
> 	How do you induce "drift under contention" to test that your
> patch actually improves something?  What testing has been done to insure
> that the new code doesn't change the behavior in other ways (regressions)?


I tested the bonding driver with and without CPU contention*. With this
patch, the LACPDU state machine is much more consistent under load, with
standard deviation of 0.0065 secs between packets. In comparison, the
current version had a standard deviation of 0.15 secs (~x23 more
variability). I imagine this gets worsens with greater contention.

When I mentioned a possible kselftest (or similar) to "stress" the state
machine, I meant whether there's already any testing that checks the
state machine through different transitions -- e.g., scenarios where the
switch instruct the bond to change configs (for example, between fast and
slow LACP modes), resetting the bond under certain conditions, etc. I just
want to be exhaustive because as you mentioned the state machine has been
around for long time.

*System was stressed using:

stress-ng --cpu $(nproc) --timeout 60

Metrics were collected with:

sudo tcpdump -e -ni <my interface> ether proto 0x8809 and ether src <mac>


>
> 	Without a specific reproducable bug scenario that this change
> fixes, I'm leery of applying such a refactor to code that has seemingly
> been working fine for 20+ years.
>
> 	I gather that what this is intending to do is reduce the current
> dependency on the scheduling accuracy of the workqueue event that runs
> the state machines.  The current implementation works on a "number of
> invocations" basis, assuming that the event is invoked every 100 msec,
> and computes various timeouts based on the number of times the state


Yep.


> machine runs.
>
> 	-J
>
>> Thanks!
>>
>> Carlos
>>
>> On 7/15/25 15:57, carlos.bilbao@kernel.org wrote:
>>> From: Carlos Bilbao <carlos.bilbao@kernel.org>
>>>
>>> Replace the bonding periodic state machine for LACPDU transmission of
>>> function ad_periodic_machine() with a jiffies-based mechanism, which is
>>> more accurate and can help reduce drift under contention.
>>>
>>> Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
>>> ---
>>>    drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
>>>    include/net/bond_3ad.h         |  2 +-
>>>    2 files changed, 32 insertions(+), 49 deletions(-)
>>>
>>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>>> index c6807e473ab7..8654a51266a3 100644
>>> --- a/drivers/net/bonding/bond_3ad.c
>>> +++ b/drivers/net/bonding/bond_3ad.c
>>> @@ -1421,44 +1421,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>>    	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
>>>    	    !bond_params->lacp_active) {
>>>    		port->sm_periodic_state = AD_NO_PERIODIC;
>>> -	}
>>> -	/* check if state machine should change state */
>>> -	else if (port->sm_periodic_timer_counter) {
>>> -		/* check if periodic state machine expired */
>>> -		if (!(--port->sm_periodic_timer_counter)) {
>>> -			/* if expired then do tx */
>>> -			port->sm_periodic_state = AD_PERIODIC_TX;
>>> -		} else {
>>> -			/* If not expired, check if there is some new timeout
>>> -			 * parameter from the partner state
>>> -			 */
>>> -			switch (port->sm_periodic_state) {
>>> -			case AD_FAST_PERIODIC:
>>> -				if (!(port->partner_oper.port_state
>>> -				      & LACP_STATE_LACP_TIMEOUT))
>>> -					port->sm_periodic_state = AD_SLOW_PERIODIC;
>>> -				break;
>>> -			case AD_SLOW_PERIODIC:
>>> -				if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) {
>>> -					port->sm_periodic_timer_counter = 0;
>>> -					port->sm_periodic_state = AD_PERIODIC_TX;
>>> -				}
>>> -				break;
>>> -			default:
>>> -				break;
>>> -			}
>>> -		}
>>> +	} else if (port->sm_periodic_state == AD_NO_PERIODIC)
>>> +		port->sm_periodic_state = AD_FAST_PERIODIC;
>>> +	/* check if periodic state machine expired */
>>> +	else if (time_after_eq(jiffies, port->sm_periodic_next_jiffies)) {
>>> +		/* if expired then do tx */
>>> +		port->sm_periodic_state = AD_PERIODIC_TX;
>>>    	} else {
>>> +		/* If not expired, check if there is some new timeout
>>> +		 * parameter from the partner state
>>> +		 */
>>>    		switch (port->sm_periodic_state) {
>>> -		case AD_NO_PERIODIC:
>>> -			port->sm_periodic_state = AD_FAST_PERIODIC;
>>> -			break;
>>> -		case AD_PERIODIC_TX:
>>> -			if (!(port->partner_oper.port_state &
>>> -			    LACP_STATE_LACP_TIMEOUT))
>>> +		case AD_FAST_PERIODIC:
>>> +			if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>>>    				port->sm_periodic_state = AD_SLOW_PERIODIC;
>>> -			else
>>> -				port->sm_periodic_state = AD_FAST_PERIODIC;
>>> +			break;
>>> +		case AD_SLOW_PERIODIC:
>>> +			if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>>> +				port->sm_periodic_state = AD_PERIODIC_TX;
>>>    			break;
>>>    		default:
>>>    			break;
>>> @@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>>    			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
>>>    			  port->actor_port_number, last_state,
>>>    			  port->sm_periodic_state);
>>> +
>>>    		switch (port->sm_periodic_state) {
>>> -		case AD_NO_PERIODIC:
>>> -			port->sm_periodic_timer_counter = 0;
>>> -			break;
>>> -		case AD_FAST_PERIODIC:
>>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
>>> -			break;
>>> -		case AD_SLOW_PERIODIC:
>>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
>>> -			break;
>>>    		case AD_PERIODIC_TX:
>>>    			port->ntt = true;
>>> -			break;
>>> +			if (!(port->partner_oper.port_state &
>>> +						LACP_STATE_LACP_TIMEOUT))
>>> +				port->sm_periodic_state = AD_SLOW_PERIODIC;
>>> +			else
>>> +				port->sm_periodic_state = AD_FAST_PERIODIC;
>>> +		fallthrough;
>>> +		case AD_SLOW_PERIODIC:
>>> +		case AD_FAST_PERIODIC:
>>> +			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
>>> +				port->sm_periodic_next_jiffies = jiffies
>>> +					+ HZ * AD_SLOW_PERIODIC_TIME;
>>> +			else /* AD_FAST_PERIODIC */
>>> +				port->sm_periodic_next_jiffies = jiffies
>>> +					+ HZ * AD_FAST_PERIODIC_TIME;
>>>    		default:
>>>    			break;
>>>    		}
>>> @@ -1987,7 +1970,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
>>>    		port->sm_rx_state = 0;
>>>    		port->sm_rx_timer_counter = 0;
>>>    		port->sm_periodic_state = 0;
>>> -		port->sm_periodic_timer_counter = 0;
>>> +		port->sm_periodic_next_jiffies = 0;
>>>    		port->sm_mux_state = 0;
>>>    		port->sm_mux_timer_counter = 0;
>>>    		port->sm_tx_state = 0;
>>> diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
>>> index 2053cd8e788a..aabb8c97caf4 100644
>>> --- a/include/net/bond_3ad.h
>>> +++ b/include/net/bond_3ad.h
>>> @@ -227,7 +227,7 @@ typedef struct port {
>>>    	rx_states_t sm_rx_state;	/* state machine rx state */
>>>    	u16 sm_rx_timer_counter;	/* state machine rx timer counter */
>>>    	periodic_states_t sm_periodic_state;	/* state machine periodic state */
>>> -	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */
>>> +	unsigned long sm_periodic_next_jiffies;	/* state machine periodic next expected sent */
>>>    	mux_states_t sm_mux_state;	/* state machine mux state */
>>>    	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
>>>    	tx_states_t sm_tx_state;	/* state machine tx state */
> ---
> 	-Jay Vosburgh, jv@jvosburgh.net
>

Thanks,

Carlos


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-16  9:09 ` Simon Horman
@ 2025-07-16 19:45   ` Carlos Bilbao
  0 siblings, 0 replies; 9+ messages in thread
From: Carlos Bilbao @ 2025-07-16 19:45 UTC (permalink / raw)
  To: Simon Horman, carlos.bilbao
  Cc: jv, andrew+netdev, davem, edumazet, kuba, pabeni, netdev,
	linux-kernel, sforshee, bilbao

Hello Simon,

On 7/16/25 04:09, Simon Horman wrote:
> On Tue, Jul 15, 2025 at 03:57:33PM -0500, carlos.bilbao@kernel.org wrote:
>> From: Carlos Bilbao <carlos.bilbao@kernel.org>
>>
>> Replace the bonding periodic state machine for LACPDU transmission of
>> function ad_periodic_machine() with a jiffies-based mechanism, which is
>> more accurate and can help reduce drift under contention.
>>
>> Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
>> ---
>>   drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
>>   include/net/bond_3ad.h         |  2 +-
>>   2 files changed, 32 insertions(+), 49 deletions(-)
>>
> Hi Carlos,
>
> Some minor feedback from my side.
>
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
> ...
>
>> @@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>   			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
>>   			  port->actor_port_number, last_state,
>>   			  port->sm_periodic_state);
>> +
>>   		switch (port->sm_periodic_state) {
>> -		case AD_NO_PERIODIC:
>> -			port->sm_periodic_timer_counter = 0;
>> -			break;
>> -		case AD_FAST_PERIODIC:
>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
>> -			break;
>> -		case AD_SLOW_PERIODIC:
>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
>> -			break;
>>   		case AD_PERIODIC_TX:
>>   			port->ntt = true;
>> -			break;
>> +			if (!(port->partner_oper.port_state &
>> +						LACP_STATE_LACP_TIMEOUT))
>> +				port->sm_periodic_state = AD_SLOW_PERIODIC;
>> +			else
>> +				port->sm_periodic_state = AD_FAST_PERIODIC;
>> +		fallthrough;
> super-nit: maybe one more tab of indentation for the line above.
>
>> +		case AD_SLOW_PERIODIC:
>> +		case AD_FAST_PERIODIC:
>> +			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
>> +				port->sm_periodic_next_jiffies = jiffies
>> +					+ HZ * AD_SLOW_PERIODIC_TIME;
>> +			else /* AD_FAST_PERIODIC */
>> +				port->sm_periodic_next_jiffies = jiffies
>> +					+ HZ * AD_FAST_PERIODIC_TIME;
> Clang 20.1.8 complains that either a break; or fallthrough; should go here.
> For consistency with the code above I'd suggest the latter.


Thank you for the feedback! I’ll be sure to address it in v2, if we decide
to move forward with this.


>
>
>>   		default:
>>   			break;
>>   		}
> ...


Thanks,

Carlos


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-16 19:44     ` Carlos Bilbao
@ 2025-07-16 23:33       ` Jay Vosburgh
  0 siblings, 0 replies; 9+ messages in thread
From: Jay Vosburgh @ 2025-07-16 23:33 UTC (permalink / raw)
  To: Carlos Bilbao
  Cc: Carlos Bilbao, carlos.bilbao, andrew+netdev, davem, edumazet,
	kuba, pabeni, horms, netdev, linux-kernel, sforshee

Carlos Bilbao <carlos.bilbao.osdev@gmail.com> wrote:

>Hello Jay,
>
>On 7/16/25 10:30, Jay Vosburgh wrote:
>> Carlos Bilbao <bilbao@vt.edu> wrote:
>>
>>> FYI, I was able to test this locally but couldn’t find any kselftests to
>>> stress the bonding state machine. If anyone knows of additional ways to
>>> test it, I’d be happy to run them.
>> 	Your commit message says this change will "help reduce drift
>> under contention," but above you say you're unable to stress the state
>> machine.
>>
>> 	How do you induce "drift under contention" to test that your
>> patch actually improves something?  What testing has been done to insure
>> that the new code doesn't change the behavior in other ways (regressions)?
>
>
>I tested the bonding driver with and without CPU contention*. With this
>patch, the LACPDU state machine is much more consistent under load, with
>standard deviation of 0.0065 secs between packets. In comparison, the
>current version had a standard deviation of 0.15 secs (~x23 more
>variability). I imagine this gets worsens with greater contention.

	You're measuring the time between successive LACPDU
transmissions?  So, "perfect" is exactly 1 second between successive
LACPDUs?

	Assuming that's the case, then the results seem odd to me,
perhaps I'm not understanding something, or maybe the standard deviation
isn't the right representation here.  

	If the drift under stress is due to scheduling delays of the
workqueue event that runs bond_3ad_state_machine_handler (which in turn
will call the periodic state machine function, ad_periodic_machine),
then I'd expect to see the inter-LACPDU time vary according to that
scheduling delay.

	For the existing implementation, because it's counter based, the
delays would simply add up, and the expiration of the periodic_timer
(per Figure 6-19 in 802.1AX-2014, the transition from FAST_PERIODIC to
PERIODIC_TX) would be delayed by whatever the sum of the delays is.

	As a hypothetical, the workqueue event normally runs every
100ms.  Suppose the next expiration is set to 10 workqueue events from
now, so one second.  Further suppose that the workqueue event runs 5 ms
late, so 105 ms.  After 10 executions, it will be at 1050 ms from the
start time, and the periodic_timer would expire 50 ms late.

	For the jiffies based implementation proposed here, I'd expect
the same behavior up to a point, due to the granularity of the workqueue
event at 100ms.

	Doing the same hypothetical, the workqueue event normally runs
every 100ms, and the next periodic_timer expiration is set to now + 1
second, in jiffies.  If the workqueue event runs 5 ms late again, after
9 executions, it will be at 945 ms from the start time, so it won't
expire the timer.  The next execution would be at 1050 ms, and the timer
would then expire, but it's still 50 ms late.

	So, in your tests, do we know what the actual scheduling delays
are, as well as the distribution of the times you've measured?

	I'm willing to believe that your proposal may be better, but I'm
not understanding why from the data you've shared (0.0065 vs 0.15).

	-J

>When I mentioned a possible kselftest (or similar) to "stress" the state
>machine, I meant whether there's already any testing that checks the
>state machine through different transitions -- e.g., scenarios where the
>switch instruct the bond to change configs (for example, between fast and
>slow LACP modes), resetting the bond under certain conditions, etc. I just
>want to be exhaustive because as you mentioned the state machine has been
>around for long time.
>
>*System was stressed using:
>
>stress-ng --cpu $(nproc) --timeout 60
>
>Metrics were collected with:
>
>sudo tcpdump -e -ni <my interface> ether proto 0x8809 and ether src <mac>
>
>
>>
>> 	Without a specific reproducable bug scenario that this change
>> fixes, I'm leery of applying such a refactor to code that has seemingly
>> been working fine for 20+ years.
>>
>> 	I gather that what this is intending to do is reduce the current
>> dependency on the scheduling accuracy of the workqueue event that runs
>> the state machines.  The current implementation works on a "number of
>> invocations" basis, assuming that the event is invoked every 100 msec,
>> and computes various timeouts based on the number of times the state
>
>
>Yep.
>
>
>> machine runs.
>>
>> 	-J
>>
>>> Thanks!
>>>
>>> Carlos
>>>
>>> On 7/15/25 15:57, carlos.bilbao@kernel.org wrote:
>>>> From: Carlos Bilbao <carlos.bilbao@kernel.org>
>>>>
>>>> Replace the bonding periodic state machine for LACPDU transmission of
>>>> function ad_periodic_machine() with a jiffies-based mechanism, which is
>>>> more accurate and can help reduce drift under contention.
>>>>
>>>> Signed-off-by: Carlos Bilbao (DigitalOcean) <carlos.bilbao@kernel.org>
>>>> ---
>>>>    drivers/net/bonding/bond_3ad.c | 79 +++++++++++++---------------------
>>>>    include/net/bond_3ad.h         |  2 +-
>>>>    2 files changed, 32 insertions(+), 49 deletions(-)
>>>>
>>>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>>>> index c6807e473ab7..8654a51266a3 100644
>>>> --- a/drivers/net/bonding/bond_3ad.c
>>>> +++ b/drivers/net/bonding/bond_3ad.c
>>>> @@ -1421,44 +1421,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>>>    	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
>>>>    	    !bond_params->lacp_active) {
>>>>    		port->sm_periodic_state = AD_NO_PERIODIC;
>>>> -	}
>>>> -	/* check if state machine should change state */
>>>> -	else if (port->sm_periodic_timer_counter) {
>>>> -		/* check if periodic state machine expired */
>>>> -		if (!(--port->sm_periodic_timer_counter)) {
>>>> -			/* if expired then do tx */
>>>> -			port->sm_periodic_state = AD_PERIODIC_TX;
>>>> -		} else {
>>>> -			/* If not expired, check if there is some new timeout
>>>> -			 * parameter from the partner state
>>>> -			 */
>>>> -			switch (port->sm_periodic_state) {
>>>> -			case AD_FAST_PERIODIC:
>>>> -				if (!(port->partner_oper.port_state
>>>> -				      & LACP_STATE_LACP_TIMEOUT))
>>>> -					port->sm_periodic_state = AD_SLOW_PERIODIC;
>>>> -				break;
>>>> -			case AD_SLOW_PERIODIC:
>>>> -				if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) {
>>>> -					port->sm_periodic_timer_counter = 0;
>>>> -					port->sm_periodic_state = AD_PERIODIC_TX;
>>>> -				}
>>>> -				break;
>>>> -			default:
>>>> -				break;
>>>> -			}
>>>> -		}
>>>> +	} else if (port->sm_periodic_state == AD_NO_PERIODIC)
>>>> +		port->sm_periodic_state = AD_FAST_PERIODIC;
>>>> +	/* check if periodic state machine expired */
>>>> +	else if (time_after_eq(jiffies, port->sm_periodic_next_jiffies)) {
>>>> +		/* if expired then do tx */
>>>> +		port->sm_periodic_state = AD_PERIODIC_TX;
>>>>    	} else {
>>>> +		/* If not expired, check if there is some new timeout
>>>> +		 * parameter from the partner state
>>>> +		 */
>>>>    		switch (port->sm_periodic_state) {
>>>> -		case AD_NO_PERIODIC:
>>>> -			port->sm_periodic_state = AD_FAST_PERIODIC;
>>>> -			break;
>>>> -		case AD_PERIODIC_TX:
>>>> -			if (!(port->partner_oper.port_state &
>>>> -			    LACP_STATE_LACP_TIMEOUT))
>>>> +		case AD_FAST_PERIODIC:
>>>> +			if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>>>>    				port->sm_periodic_state = AD_SLOW_PERIODIC;
>>>> -			else
>>>> -				port->sm_periodic_state = AD_FAST_PERIODIC;
>>>> +			break;
>>>> +		case AD_SLOW_PERIODIC:
>>>> +			if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
>>>> +				port->sm_periodic_state = AD_PERIODIC_TX;
>>>>    			break;
>>>>    		default:
>>>>    			break;
>>>> @@ -1471,21 +1451,24 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
>>>>    			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
>>>>    			  port->actor_port_number, last_state,
>>>>    			  port->sm_periodic_state);
>>>> +
>>>>    		switch (port->sm_periodic_state) {
>>>> -		case AD_NO_PERIODIC:
>>>> -			port->sm_periodic_timer_counter = 0;
>>>> -			break;
>>>> -		case AD_FAST_PERIODIC:
>>>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>>>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;
>>>> -			break;
>>>> -		case AD_SLOW_PERIODIC:
>>>> -			/* decrement 1 tick we lost in the PERIODIC_TX cycle */
>>>> -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;
>>>> -			break;
>>>>    		case AD_PERIODIC_TX:
>>>>    			port->ntt = true;
>>>> -			break;
>>>> +			if (!(port->partner_oper.port_state &
>>>> +						LACP_STATE_LACP_TIMEOUT))
>>>> +				port->sm_periodic_state = AD_SLOW_PERIODIC;
>>>> +			else
>>>> +				port->sm_periodic_state = AD_FAST_PERIODIC;
>>>> +		fallthrough;
>>>> +		case AD_SLOW_PERIODIC:
>>>> +		case AD_FAST_PERIODIC:
>>>> +			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
>>>> +				port->sm_periodic_next_jiffies = jiffies
>>>> +					+ HZ * AD_SLOW_PERIODIC_TIME;
>>>> +			else /* AD_FAST_PERIODIC */
>>>> +				port->sm_periodic_next_jiffies = jiffies
>>>> +					+ HZ * AD_FAST_PERIODIC_TIME;
>>>>    		default:
>>>>    			break;
>>>>    		}
>>>> @@ -1987,7 +1970,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
>>>>    		port->sm_rx_state = 0;
>>>>    		port->sm_rx_timer_counter = 0;
>>>>    		port->sm_periodic_state = 0;
>>>> -		port->sm_periodic_timer_counter = 0;
>>>> +		port->sm_periodic_next_jiffies = 0;
>>>>    		port->sm_mux_state = 0;
>>>>    		port->sm_mux_timer_counter = 0;
>>>>    		port->sm_tx_state = 0;
>>>> diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
>>>> index 2053cd8e788a..aabb8c97caf4 100644
>>>> --- a/include/net/bond_3ad.h
>>>> +++ b/include/net/bond_3ad.h
>>>> @@ -227,7 +227,7 @@ typedef struct port {
>>>>    	rx_states_t sm_rx_state;	/* state machine rx state */
>>>>    	u16 sm_rx_timer_counter;	/* state machine rx timer counter */
>>>>    	periodic_states_t sm_periodic_state;	/* state machine periodic state */
>>>> -	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */
>>>> +	unsigned long sm_periodic_next_jiffies;	/* state machine periodic next expected sent */
>>>>    	mux_states_t sm_mux_state;	/* state machine mux state */
>>>>    	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
>>>>    	tx_states_t sm_tx_state;	/* state machine tx state */
>> ---
>> 	-Jay Vosburgh, jv@jvosburgh.net
>>
>
>Thanks,
>
>Carlos
>

---
	-Jay Vosburgh, jv@jvosburgh.net

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-15 20:57 [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies carlos.bilbao
  2025-07-15 20:59 ` Carlos Bilbao
  2025-07-16  9:09 ` Simon Horman
@ 2025-07-17  4:35 ` kernel test robot
  2 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2025-07-17  4:35 UTC (permalink / raw)
  To: carlos.bilbao, jv, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms, netdev, linux-kernel
  Cc: llvm, oe-kbuild-all, sforshee, bilbao, Carlos Bilbao

Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]
[also build test WARNING on net/main linus/master v6.16-rc6 next-20250716]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/carlos-bilbao-kernel-org/bonding-Switch-periodic-LACPDU-state-machine-from-counter-to-jiffies/20250716-045912
base:   net-next/main
patch link:    https://lore.kernel.org/r/20250715205733.50911-1-carlos.bilbao%40kernel.org
patch subject: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
config: i386-randconfig-017-20250717 (https://download.01.org/0day-ci/archive/20250717/202507171204.xrYX6hPj-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250717/202507171204.xrYX6hPj-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507171204.xrYX6hPj-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/net/bonding/bond_3ad.c:1484:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
    1484 |                 default:
         |                 ^
   drivers/net/bonding/bond_3ad.c:1484:3: note: insert 'break;' to avoid fall-through
    1484 |                 default:
         |                 ^
         |                 break; 
   1 warning generated.


vim +1484 drivers/net/bonding/bond_3ad.c

^1da177e4c3f41 Linus Torvalds      2005-04-16  1416  
^1da177e4c3f41 Linus Torvalds      2005-04-16  1417  /**
^1da177e4c3f41 Linus Torvalds      2005-04-16  1418   * ad_periodic_machine - handle a port's periodic state machine
^1da177e4c3f41 Linus Torvalds      2005-04-16  1419   * @port: the port we're looking at
3a755cd8b7c601 Hangbin Liu         2021-08-02  1420   * @bond_params: bond parameters we will use
^1da177e4c3f41 Linus Torvalds      2005-04-16  1421   *
^1da177e4c3f41 Linus Torvalds      2005-04-16  1422   * Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
^1da177e4c3f41 Linus Torvalds      2005-04-16  1423   */
bbef56d861f103 Colin Ian King      2021-09-07  1424  static void ad_periodic_machine(struct port *port, struct bond_params *bond_params)
^1da177e4c3f41 Linus Torvalds      2005-04-16  1425  {
^1da177e4c3f41 Linus Torvalds      2005-04-16  1426  	periodic_states_t last_state;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1427  
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1428  	/* keep current state machine state to compare later if it was changed */
^1da177e4c3f41 Linus Torvalds      2005-04-16  1429  	last_state = port->sm_periodic_state;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1430  
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1431  	/* check if port was reinitialized */
^1da177e4c3f41 Linus Torvalds      2005-04-16  1432  	if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
3a755cd8b7c601 Hangbin Liu         2021-08-02  1433  	    (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
bbef56d861f103 Colin Ian King      2021-09-07  1434  	    !bond_params->lacp_active) {
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1435  		port->sm_periodic_state = AD_NO_PERIODIC;
a117d493f00ee2 Carlos Bilbao       2025-07-15  1436  	} else if (port->sm_periodic_state == AD_NO_PERIODIC)
a117d493f00ee2 Carlos Bilbao       2025-07-15  1437  		port->sm_periodic_state = AD_FAST_PERIODIC;
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1438  	/* check if periodic state machine expired */
a117d493f00ee2 Carlos Bilbao       2025-07-15  1439  	else if (time_after_eq(jiffies, port->sm_periodic_next_jiffies)) {
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1440  		/* if expired then do tx */
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1441  		port->sm_periodic_state = AD_PERIODIC_TX;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1442  	} else {
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1443  		/* If not expired, check if there is some new timeout
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1444  		 * parameter from the partner state
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1445  		 */
^1da177e4c3f41 Linus Torvalds      2005-04-16  1446  		switch (port->sm_periodic_state) {
^1da177e4c3f41 Linus Torvalds      2005-04-16  1447  		case AD_FAST_PERIODIC:
a117d493f00ee2 Carlos Bilbao       2025-07-15  1448  			if (!(port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1449  				port->sm_periodic_state = AD_SLOW_PERIODIC;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1450  			break;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1451  		case AD_SLOW_PERIODIC:
a117d493f00ee2 Carlos Bilbao       2025-07-15  1452  			if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT))
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1453  				port->sm_periodic_state = AD_PERIODIC_TX;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1454  			break;
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1455  		default:
^1da177e4c3f41 Linus Torvalds      2005-04-16  1456  			break;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1457  		}
^1da177e4c3f41 Linus Torvalds      2005-04-16  1458  	}
^1da177e4c3f41 Linus Torvalds      2005-04-16  1459  
3bf2d28a2d7112 Veaceslav Falico    2014-01-08  1460  	/* check if the state machine was changed */
^1da177e4c3f41 Linus Torvalds      2005-04-16  1461  	if (port->sm_periodic_state != last_state) {
17720981964ac5 Jarod Wilson        2019-06-07  1462  		slave_dbg(port->slave->bond->dev, port->slave->dev,
17720981964ac5 Jarod Wilson        2019-06-07  1463  			  "Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",
a4aee5c808fc5b Joe Perches         2009-12-13  1464  			  port->actor_port_number, last_state,
a4aee5c808fc5b Joe Perches         2009-12-13  1465  			  port->sm_periodic_state);
a117d493f00ee2 Carlos Bilbao       2025-07-15  1466  
^1da177e4c3f41 Linus Torvalds      2005-04-16  1467  		switch (port->sm_periodic_state) {
^1da177e4c3f41 Linus Torvalds      2005-04-16  1468  		case AD_PERIODIC_TX:
d238d458a70ad1 Holger Eitzenberger 2008-12-26  1469  			port->ntt = true;
a117d493f00ee2 Carlos Bilbao       2025-07-15  1470  			if (!(port->partner_oper.port_state &
a117d493f00ee2 Carlos Bilbao       2025-07-15  1471  						LACP_STATE_LACP_TIMEOUT))
a117d493f00ee2 Carlos Bilbao       2025-07-15  1472  				port->sm_periodic_state = AD_SLOW_PERIODIC;
a117d493f00ee2 Carlos Bilbao       2025-07-15  1473  			else
a117d493f00ee2 Carlos Bilbao       2025-07-15  1474  				port->sm_periodic_state = AD_FAST_PERIODIC;
a117d493f00ee2 Carlos Bilbao       2025-07-15  1475  		fallthrough;
a117d493f00ee2 Carlos Bilbao       2025-07-15  1476  		case AD_SLOW_PERIODIC:
a117d493f00ee2 Carlos Bilbao       2025-07-15  1477  		case AD_FAST_PERIODIC:
a117d493f00ee2 Carlos Bilbao       2025-07-15  1478  			if (port->sm_periodic_state == AD_SLOW_PERIODIC)
a117d493f00ee2 Carlos Bilbao       2025-07-15  1479  				port->sm_periodic_next_jiffies = jiffies
a117d493f00ee2 Carlos Bilbao       2025-07-15  1480  					+ HZ * AD_SLOW_PERIODIC_TIME;
a117d493f00ee2 Carlos Bilbao       2025-07-15  1481  			else /* AD_FAST_PERIODIC */
a117d493f00ee2 Carlos Bilbao       2025-07-15  1482  				port->sm_periodic_next_jiffies = jiffies
a117d493f00ee2 Carlos Bilbao       2025-07-15  1483  					+ HZ * AD_FAST_PERIODIC_TIME;
3bf2d28a2d7112 Veaceslav Falico    2014-01-08 @1484  		default:
^1da177e4c3f41 Linus Torvalds      2005-04-16  1485  			break;
^1da177e4c3f41 Linus Torvalds      2005-04-16  1486  		}
^1da177e4c3f41 Linus Torvalds      2005-04-16  1487  	}
^1da177e4c3f41 Linus Torvalds      2005-04-16  1488  }
^1da177e4c3f41 Linus Torvalds      2005-04-16  1489  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies
  2025-07-15 20:59 ` Carlos Bilbao
  2025-07-16 15:30   ` Jay Vosburgh
@ 2025-08-05 13:14   ` Hangbin Liu
  1 sibling, 0 replies; 9+ messages in thread
From: Hangbin Liu @ 2025-08-05 13:14 UTC (permalink / raw)
  To: Carlos Bilbao
  Cc: carlos.bilbao, jv, andrew+netdev, davem, edumazet, kuba, pabeni,
	horms, netdev, linux-kernel, sforshee

On Tue, Jul 15, 2025 at 03:59:39PM -0500, Carlos Bilbao wrote:
> FYI, I was able to test this locally but couldn’t find any kselftests to
> stress the bonding state machine. If anyone knows of additional ways to
> test it, I’d be happy to run them.

Hi Carlos,

I have wrote a tool[1] to do lacp simulation and state injection. Feel free to
try it and open feature requests.

[1] lacpd: https://github.com/liuhangbin/lacpd

Thanks
Hangbin

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-08-05 13:14 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-15 20:57 [PATCH] bonding: Switch periodic LACPDU state machine from counter to jiffies carlos.bilbao
2025-07-15 20:59 ` Carlos Bilbao
2025-07-16 15:30   ` Jay Vosburgh
2025-07-16 19:44     ` Carlos Bilbao
2025-07-16 23:33       ` Jay Vosburgh
2025-08-05 13:14   ` Hangbin Liu
2025-07-16  9:09 ` Simon Horman
2025-07-16 19:45   ` Carlos Bilbao
2025-07-17  4:35 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).