Intel-Wired-Lan Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-wired-lan] [PATCH iwl-net v2] ice: Fix call trace when rebuild fails
@ 2023-09-08  8:59 Mateusz Palczewski
  2023-09-12 22:05 ` Tony Nguyen
  0 siblings, 1 reply; 3+ messages in thread
From: Mateusz Palczewski @ 2023-09-08  8:59 UTC (permalink / raw)
  To: intel-wired-lan

In case rebuild fails trying to restart or unload a driver lead to call
trace.

    [  128.876458] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
    [  128.884417] PGD 33510d067 P4D 0
    [  128.887776] Oops: 0000 [#1] SMP NOPTI
    [  128.891571] CPU: 6 PID: 2141 Comm: ethtool Kdump: loaded Not tainted 4.18.0-372.34.1.el8_6.ice.2117361.repr.x86_64 #1
    [  128.902308] Hardware name: ACCTON BRIGHTONCITY/BRIGHTONCITY, BIOS IDVLCRB1.86B.0021.D40.2112020610 12/02/2021
    [  128.912351] RIP: 0010:ice_vsi_setup_tx_rings+0x26/0x80 [ice]
    [  128.918163] Code: 00 0f 1f 00 0f 1f 44 00 00 41 54 55 53 66 83 bf da 03 00 00 00 48 89 fb 0f 84 c9 38 05 00 48 8b 47 28 41 bc 08 00 00 00 31 ed <48> 8b 38 48 85 ff 75 21 eb 3d 0f b7 93 da 03 00 00 83 c5 01 39 ea
    [  128.937060] RSP: 0018:ff78a8b908183a10 EFLAGS: 00010246
    [  128.942417] RAX: 0000000000000000 RBX: ff4a4174c104e818 RCX: 0000000000000a50
    [  128.949683] RDX: 0000000000000a4f RSI: 641269b8ad24144d RDI: ff4a4174c104e818
    [  128.956946] RBP: 0000000000000000 R08: 0000000080000000 R09: ff4a4173241b9b30
    [  128.964208] R10: ff4a4173241b9b30 R11: 0000000000000000 R12: 0000000000000008
    [  128.971475] R13: ff4a417570650180 R14: 0000000000000001 R15: 0000000000000001
    [  128.978741] FS:  00007f18a268e740(0000) GS:ff4a41823ff80000(0000) knlGS:0000000000000000
    [  128.986961] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [  128.992837] CR2: 0000000000000000 CR3: 0000000109e8e003 CR4: 0000000000771ee0
    [  129.000103] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [  129.007368] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    [  129.014635] PKRU: 55555554
    [  129.017473] Call Trace:
    [  129.020054]  ice_vsi_open+0x29/0x120 [ice]
    [  129.024292]  ice_vsi_recfg_qs+0xaa/0x110 [ice]
    [  129.028874]  ice_set_channels+0x18d/0x3d0 [ice]
    [  129.033549]  ethnl_set_channels+0x3a1/0x410
    [  129.037865]  genl_family_rcv_msg_doit.isra.17+0x113/0x150
    [  129.043394]  genl_family_rcv_msg+0xb7/0x170
    [  129.047708]  ? channels_fill_reply+0x1a0/0x1a0
    [  129.052282]  genl_rcv_msg+0x47/0xa0
    [  129.055901]  ? genl_family_rcv_msg+0x170/0x170

Fix this by setting ICE_RESET_FAILED when rebuild fails and
clearing number of qs when rings are free.

Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
---
 v2: Add dev_err message about rebuild fail
---
 drivers/net/ethernet/intel/ice/ice_lib.c  | 7 +++++++
 drivers/net/ethernet/intel/ice/ice_main.c | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0054d7e64ec3..09190457113a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -321,6 +321,9 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 
 	dev = ice_pf_to_dev(pf);
 
+	vsi->num_txq = 0;
+	vsi->num_rxq = 0;
+
 	bitmap_free(vsi->af_xdp_zc_qps);
 	vsi->af_xdp_zc_qps = NULL;
 	/* free the ring and vector containers */
@@ -3157,6 +3160,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 	struct ice_coalesce_stored *coalesce;
 	int ret, prev_txq, prev_rxq;
 	int prev_num_q_vectors = 0;
+	struct device *dev;
 	struct ice_pf *pf;
 
 	if (!vsi)
@@ -3166,6 +3170,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 	params.flags = vsi_flags;
 
 	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
 	if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
 		return -EINVAL;
 
@@ -3206,6 +3211,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 	ice_vsi_decfg(vsi);
 err_vsi_cfg:
 	kfree(coalesce);
+	set_bit(ICE_RESET_FAILED, pf->state);
+	dev_err(dev, "Rebuild failed, unload and reload driver\n");
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index b40dfe6ae321..becf7f0fcd4c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4232,7 +4232,7 @@ static void ice_deinit_fdir(struct ice_pf *pf)
 {
 	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
 
-	if (!vsi)
+	if (!vsi || test_bit(ICE_RESET_FAILED, pf->state))
 		return;
 
 	ice_vsi_manage_fdir(vsi, false);
@@ -4746,7 +4746,7 @@ static void ice_deinit_pf_sw(struct ice_pf *pf)
 {
 	struct ice_vsi *vsi = ice_get_main_vsi(pf);
 
-	if (!vsi)
+	if (!vsi || test_bit(ICE_RESET_FAILED, pf->state))
 		return;
 
 	ice_vsi_release(vsi);
-- 
2.31.1

_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan@osuosl.org
https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Intel-wired-lan] [PATCH iwl-net v2] ice: Fix call trace when rebuild fails
  2023-09-08  8:59 [Intel-wired-lan] [PATCH iwl-net v2] ice: Fix call trace when rebuild fails Mateusz Palczewski
@ 2023-09-12 22:05 ` Tony Nguyen
  2023-09-20 12:19   ` Palczewski, Mateusz
  0 siblings, 1 reply; 3+ messages in thread
From: Tony Nguyen @ 2023-09-12 22:05 UTC (permalink / raw)
  To: Mateusz Palczewski, intel-wired-lan, Michal Swiatkowski



On 9/8/2023 1:59 AM, Mateusz Palczewski wrote:
> In case rebuild fails trying to restart or unload a driver lead to call
> trace.
> 
>      [  128.876458] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
>      [  128.884417] PGD 33510d067 P4D 0
>      [  128.887776] Oops: 0000 [#1] SMP NOPTI
>      [  128.891571] CPU: 6 PID: 2141 Comm: ethtool Kdump: loaded Not tainted 4.18.0-372.34.1.el8_6.ice.2117361.repr.x86_64 #1
>      [  128.902308] Hardware name: ACCTON BRIGHTONCITY/BRIGHTONCITY, BIOS IDVLCRB1.86B.0021.D40.2112020610 12/02/2021
>      [  128.912351] RIP: 0010:ice_vsi_setup_tx_rings+0x26/0x80 [ice]
>      [  128.918163] Code: 00 0f 1f 00 0f 1f 44 00 00 41 54 55 53 66 83 bf da 03 00 00 00 48 89 fb 0f 84 c9 38 05 00 48 8b 47 28 41 bc 08 00 00 00 31 ed <48> 8b 38 48 85 ff 75 21 eb 3d 0f b7 93 da 03 00 00 83 c5 01 39 ea
>      [  128.937060] RSP: 0018:ff78a8b908183a10 EFLAGS: 00010246
>      [  128.942417] RAX: 0000000000000000 RBX: ff4a4174c104e818 RCX: 0000000000000a50
>      [  128.949683] RDX: 0000000000000a4f RSI: 641269b8ad24144d RDI: ff4a4174c104e818
>      [  128.956946] RBP: 0000000000000000 R08: 0000000080000000 R09: ff4a4173241b9b30
>      [  128.964208] R10: ff4a4173241b9b30 R11: 0000000000000000 R12: 0000000000000008
>      [  128.971475] R13: ff4a417570650180 R14: 0000000000000001 R15: 0000000000000001
>      [  128.978741] FS:  00007f18a268e740(0000) GS:ff4a41823ff80000(0000) knlGS:0000000000000000
>      [  128.986961] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>      [  128.992837] CR2: 0000000000000000 CR3: 0000000109e8e003 CR4: 0000000000771ee0
>      [  129.000103] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>      [  129.007368] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>      [  129.014635] PKRU: 55555554
>      [  129.017473] Call Trace:
>      [  129.020054]  ice_vsi_open+0x29/0x120 [ice]
>      [  129.024292]  ice_vsi_recfg_qs+0xaa/0x110 [ice]
>      [  129.028874]  ice_set_channels+0x18d/0x3d0 [ice]
>      [  129.033549]  ethnl_set_channels+0x3a1/0x410
>      [  129.037865]  genl_family_rcv_msg_doit.isra.17+0x113/0x150
>      [  129.043394]  genl_family_rcv_msg+0xb7/0x170
>      [  129.047708]  ? channels_fill_reply+0x1a0/0x1a0
>      [  129.052282]  genl_rcv_msg+0x47/0xa0
>      [  129.055901]  ? genl_family_rcv_msg+0x170/0x170
> 
> Fix this by setting ICE_RESET_FAILED when rebuild fails and
> clearing number of qs when rings are free.

What's the situation leading to the call trace? Do you know why we are 
failing rebuild?

> Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
> Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
> Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
> ---
>   v2: Add dev_err message about rebuild fail
> ---
>   drivers/net/ethernet/intel/ice/ice_lib.c  | 7 +++++++
>   drivers/net/ethernet/intel/ice/ice_main.c | 4 ++--
>   2 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> index 0054d7e64ec3..09190457113a 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> @@ -321,6 +321,9 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
>   
>   	dev = ice_pf_to_dev(pf);
>   
> +	vsi->num_txq = 0;
> +	vsi->num_rxq = 0;
> +
>   	bitmap_free(vsi->af_xdp_zc_qps);
>   	vsi->af_xdp_zc_qps = NULL;
>   	/* free the ring and vector containers */
> @@ -3157,6 +3160,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
>   	struct ice_coalesce_stored *coalesce;
>   	int ret, prev_txq, prev_rxq;
>   	int prev_num_q_vectors = 0;
> +	struct device *dev;
>   	struct ice_pf *pf;
>   
>   	if (!vsi)
> @@ -3166,6 +3170,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
>   	params.flags = vsi_flags;
>   
>   	pf = vsi->back;
> +	dev = ice_pf_to_dev(pf);
>   	if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
>   		return -EINVAL;
>   
> @@ -3206,6 +3211,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
>   	ice_vsi_decfg(vsi);
>   err_vsi_cfg:
>   	kfree(coalesce);
> +	set_bit(ICE_RESET_FAILED, pf->state);
> +	dev_err(dev, "Rebuild failed, unload and reload driver\n");

How is this reproduced? How common is it? Forcing reload seems pretty 
disruptive.

Also, no need for a local var; use ice_pf_to_dev() in the call.

>   	return ret;
>   }
>   
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index b40dfe6ae321..becf7f0fcd4c 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -4232,7 +4232,7 @@ static void ice_deinit_fdir(struct ice_pf *pf)
>   {
>   	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
>   
> -	if (!vsi)
> +	if (!vsi || test_bit(ICE_RESET_FAILED, pf->state))
>   		return;
>   
>   	ice_vsi_manage_fdir(vsi, false);
> @@ -4746,7 +4746,7 @@ static void ice_deinit_pf_sw(struct ice_pf *pf)
>   {
>   	struct ice_vsi *vsi = ice_get_main_vsi(pf);
>   
> -	if (!vsi)
> +	if (!vsi || test_bit(ICE_RESET_FAILED, pf->state))
>   		return;
>   
>   	ice_vsi_release(vsi);
_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan@osuosl.org
https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Intel-wired-lan] [PATCH iwl-net v2] ice: Fix call trace when rebuild fails
  2023-09-12 22:05 ` Tony Nguyen
@ 2023-09-20 12:19   ` Palczewski, Mateusz
  0 siblings, 0 replies; 3+ messages in thread
From: Palczewski, Mateusz @ 2023-09-20 12:19 UTC (permalink / raw)
  To: Nguyen, Anthony L; +Cc: Michal Swiatkowski, intel-wired-lan@lists.osuosl.org


>On 9/8/2023 1:59 AM, Mateusz Palczewski wrote:
>> In case rebuild fails trying to restart or unload a driver lead to 
>> call trace.
>> 
>>      [  128.876458] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
>>      [  128.884417] PGD 33510d067 P4D 0
>>      [  128.887776] Oops: 0000 [#1] SMP NOPTI
>>      [  128.891571] CPU: 6 PID: 2141 Comm: ethtool Kdump: loaded Not tainted 4.18.0-372.34.1.el8_6.ice.2117361.repr.x86_64 #1
>>      [  128.902308] Hardware name: ACCTON BRIGHTONCITY/BRIGHTONCITY, BIOS IDVLCRB1.86B.0021.D40.2112020610 12/02/2021
>>      [  128.912351] RIP: 0010:ice_vsi_setup_tx_rings+0x26/0x80 [ice]
>>      [  128.918163] Code: 00 0f 1f 00 0f 1f 44 00 00 41 54 55 53 66 83 bf da 03 00 00 00 48 89 fb 0f 84 c9 38 05 00 48 8b 47 28 41 bc 08 00 00 00 31 ed <48> 8b 38 48 85 ff 75 21 eb 3d 0f b7 93 da 03 00 00 83 c5 01 39 ea
>>      [  128.937060] RSP: 0018:ff78a8b908183a10 EFLAGS: 00010246
>>      [  128.942417] RAX: 0000000000000000 RBX: ff4a4174c104e818 RCX: 0000000000000a50
>>      [  128.949683] RDX: 0000000000000a4f RSI: 641269b8ad24144d RDI: ff4a4174c104e818
>>      [  128.956946] RBP: 0000000000000000 R08: 0000000080000000 R09: ff4a4173241b9b30
>>      [  128.964208] R10: ff4a4173241b9b30 R11: 0000000000000000 R12: 0000000000000008
>>      [  128.971475] R13: ff4a417570650180 R14: 0000000000000001 R15: 0000000000000001
>>      [  128.978741] FS:  00007f18a268e740(0000) GS:ff4a41823ff80000(0000) knlGS:0000000000000000
>>      [  128.986961] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>      [  128.992837] CR2: 0000000000000000 CR3: 0000000109e8e003 CR4: 0000000000771ee0
>>      [  129.000103] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>      [  129.007368] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>>      [  129.014635] PKRU: 55555554
>>      [  129.017473] Call Trace:
>>      [  129.020054]  ice_vsi_open+0x29/0x120 [ice]
>>      [  129.024292]  ice_vsi_recfg_qs+0xaa/0x110 [ice]
>>      [  129.028874]  ice_set_channels+0x18d/0x3d0 [ice]
>>      [  129.033549]  ethnl_set_channels+0x3a1/0x410
>>      [  129.037865]  genl_family_rcv_msg_doit.isra.17+0x113/0x150
>>      [  129.043394]  genl_family_rcv_msg+0xb7/0x170
>>      [  129.047708]  ? channels_fill_reply+0x1a0/0x1a0
>>      [  129.052282]  genl_rcv_msg+0x47/0xa0
>>      [  129.055901]  ? genl_family_rcv_msg+0x170/0x170
>> 
>> Fix this by setting ICE_RESET_FAILED when rebuild fails and clearing 
>> number of qs when rings are free.
>
>What's the situation leading to the call trace? Do you know why we are failing rebuild?

It is manually done by injecting an error condition during queues allocation by using module parameter.

>
>> Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller 
>> functions")
>> Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
>> Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
>> ---
>>   v2: Add dev_err message about rebuild fail
>> ---
>>   drivers/net/ethernet/intel/ice/ice_lib.c  | 7 +++++++
>>   drivers/net/ethernet/intel/ice/ice_main.c | 4 ++--
>>   2 files changed, 9 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c 
>> b/drivers/net/ethernet/intel/ice/ice_lib.c
>> index 0054d7e64ec3..09190457113a 100644
>> --- a/drivers/net/ethernet/intel/ice/ice_lib.c
>> +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
>> @@ -321,6 +321,9 @@ static void ice_vsi_free_arrays(struct ice_vsi 
>> *vsi)
>>   
>>   	dev = ice_pf_to_dev(pf);
>>   
>> +	vsi->num_txq = 0;
>> +	vsi->num_rxq = 0;
>> +
>>   	bitmap_free(vsi->af_xdp_zc_qps);
>>   	vsi->af_xdp_zc_qps = NULL;
>>   	/* free the ring and vector containers */ @@ -3157,6 +3160,7 @@ int 
>> ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
>>   	struct ice_coalesce_stored *coalesce;
>>   	int ret, prev_txq, prev_rxq;
>>   	int prev_num_q_vectors = 0;
>> +	struct device *dev;
>>   	struct ice_pf *pf;
>>   
>>   	if (!vsi)
>> @@ -3166,6 +3170,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
>>   	params.flags = vsi_flags;
>>   
>>   	pf = vsi->back;
>> +	dev = ice_pf_to_dev(pf);
>>   	if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
>>   		return -EINVAL;
>>   
>> @@ -3206,6 +3211,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
>>   	ice_vsi_decfg(vsi);
>>   err_vsi_cfg:
>>   	kfree(coalesce);
>> +	set_bit(ICE_RESET_FAILED, pf->state);
>> +	dev_err(dev, "Rebuild failed, unload and reload driver\n");
>
>How is this reproduced? How common is it? Forcing reload seems pretty disruptive.

As mentioned above it is done by manual error injection and was reported by a customer. 

>
>Also, no need for a local var; use ice_pf_to_dev() in the call.

Sure, will change that in v3. 

>
>>   	return ret;
>>   }
>>   
>> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c 
>> b/drivers/net/ethernet/intel/ice/ice_main.c
>> index b40dfe6ae321..becf7f0fcd4c 100644
>> --- a/drivers/net/ethernet/intel/ice/ice_main.c
>> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
>> @@ -4232,7 +4232,7 @@ static void ice_deinit_fdir(struct ice_pf *pf)
>>   {
>>   	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
>>   
>> -	if (!vsi)
>> +	if (!vsi || test_bit(ICE_RESET_FAILED, pf->state))
>>   		return;
>>   
>>   	ice_vsi_manage_fdir(vsi, false);
>> @@ -4746,7 +4746,7 @@ static void ice_deinit_pf_sw(struct ice_pf *pf)
>>   {
>>   	struct ice_vsi *vsi = ice_get_main_vsi(pf);
>>   
>> -	if (!vsi)
>> +	if (!vsi || test_bit(ICE_RESET_FAILED, pf->state))
>>   		return;
>>   
>>   	ice_vsi_release(vsi);
>

Regards,
Mateusz
_______________________________________________
Intel-wired-lan mailing list
Intel-wired-lan@osuosl.org
https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-09-20 12:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-08  8:59 [Intel-wired-lan] [PATCH iwl-net v2] ice: Fix call trace when rebuild fails Mateusz Palczewski
2023-09-12 22:05 ` Tony Nguyen
2023-09-20 12:19   ` Palczewski, Mateusz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox