* [PATCH net v4 0/2] iavf: Fix issues when setting channels concurrency with removing
@ 2023-05-03 3:15 Ding Hui
2023-05-03 3:15 ` [PATCH net v4 1/2] iavf: Fix use-after-free in free_netdev Ding Hui
2023-05-03 3:15 ` [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove Ding Hui
0 siblings, 2 replies; 11+ messages in thread
From: Ding Hui @ 2023-05-03 3:15 UTC (permalink / raw)
To: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg,
anthony.l.nguyen
Cc: keescook, grzegorzx.szczurek, mateusz.palczewski,
mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher,
michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel,
linux-hardening, pengdonglin, huangcun, Ding Hui
The patchset fix two issues which can be reproduced by the following script:
[root@host ~]# cat repro.sh
#!/bin/bash
pf_dbsf="0000:41:00.0"
vf0_dbsf="0000:41:02.0"
g_pids=()
function do_set_numvf()
{
echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
sleep $((RANDOM%3+1))
echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
sleep $((RANDOM%3+1))
}
function do_set_channel()
{
local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/)
[ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; }
ifconfig $nic 192.168.18.5 netmask 255.255.255.0
ifconfig $nic up
ethtool -L $nic combined 1
ethtool -L $nic combined 4
sleep $((RANDOM%3))
}
function on_exit()
{
local pid
for pid in "${g_pids[@]}"; do
kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null
done
g_pids=()
}
trap "on_exit; exit" EXIT
while :; do do_set_numvf ; done &
g_pids+=($!)
while :; do do_set_channel ; done &
g_pids+=($!)
wait
Ding Hui (2):
iavf: Fix use-after-free in free_netdev
iavf: Fix out-of-bounds when setting channels on remove
drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++-
drivers/net/ethernet/intel/iavf/iavf_main.c | 6 +-----
2 files changed, 4 insertions(+), 6 deletions(-)
--
2.17.1
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH net v4 1/2] iavf: Fix use-after-free in free_netdev 2023-05-03 3:15 [PATCH net v4 0/2] iavf: Fix issues when setting channels concurrency with removing Ding Hui @ 2023-05-03 3:15 ` Ding Hui 2023-05-03 3:15 ` [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove Ding Hui 1 sibling, 0 replies; 11+ messages in thread From: Ding Hui @ 2023-05-03 3:15 UTC (permalink / raw) To: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen Cc: keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun, Ding Hui We do netif_napi_add() for all allocated q_vectors[], but potentially do netif_napi_del() for part of them, then kfree q_vectors and leave invalid pointers at dev->napi_list. Reproducer: [root@host ~]# cat repro.sh #!/bin/bash pf_dbsf="0000:41:00.0" vf0_dbsf="0000:41:02.0" g_pids=() function do_set_numvf() { echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) } function do_set_channel() { local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } ifconfig $nic 192.168.18.5 netmask 255.255.255.0 ifconfig $nic up ethtool -L $nic combined 1 ethtool -L $nic combined 4 sleep $((RANDOM%3)) } function on_exit() { local pid for pid in "${g_pids[@]}"; do kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null done g_pids=() } trap "on_exit; exit" EXIT while :; do do_set_numvf ; done & g_pids+=($!) while :; do do_set_channel ; done & g_pids+=($!) wait Result: [ 4093.900222] ================================================================== [ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390 [ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699 [ 4093.900233] [ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 [ 4093.900239] Call Trace: [ 4093.900244] dump_stack+0x71/0xab [ 4093.900249] print_address_description+0x6b/0x290 [ 4093.900251] ? free_netdev+0x308/0x390 [ 4093.900252] kasan_report+0x14a/0x2b0 [ 4093.900254] free_netdev+0x308/0x390 [ 4093.900261] iavf_remove+0x825/0xd20 [iavf] [ 4093.900265] pci_device_remove+0xa8/0x1f0 [ 4093.900268] device_release_driver_internal+0x1c6/0x460 [ 4093.900271] pci_stop_bus_device+0x101/0x150 [ 4093.900273] pci_stop_and_remove_bus_device+0xe/0x20 [ 4093.900275] pci_iov_remove_virtfn+0x187/0x420 [ 4093.900277] ? pci_iov_add_virtfn+0xe10/0xe10 [ 4093.900278] ? pci_get_subsys+0x90/0x90 [ 4093.900280] sriov_disable+0xed/0x3e0 [ 4093.900282] ? bus_find_device+0x12d/0x1a0 [ 4093.900290] i40e_free_vfs+0x754/0x1210 [i40e] [ 4093.900298] ? i40e_reset_all_vfs+0x880/0x880 [i40e] [ 4093.900299] ? pci_get_device+0x7c/0x90 [ 4093.900300] ? pci_get_subsys+0x90/0x90 [ 4093.900306] ? pci_vfs_assigned.part.7+0x144/0x210 [ 4093.900309] ? __mutex_lock_slowpath+0x10/0x10 [ 4093.900315] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] [ 4093.900318] sriov_numvfs_store+0x214/0x290 [ 4093.900320] ? sriov_totalvfs_show+0x30/0x30 [ 4093.900321] ? __mutex_lock_slowpath+0x10/0x10 [ 4093.900323] ? __check_object_size+0x15a/0x350 [ 4093.900326] kernfs_fop_write+0x280/0x3f0 [ 4093.900329] vfs_write+0x145/0x440 [ 4093.900330] ksys_write+0xab/0x160 [ 4093.900332] ? __ia32_sys_read+0xb0/0xb0 [ 4093.900334] ? fput_many+0x1a/0x120 [ 4093.900335] ? filp_close+0xf0/0x130 [ 4093.900338] do_syscall_64+0xa0/0x370 [ 4093.900339] ? page_fault+0x8/0x30 [ 4093.900341] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 4093.900357] RIP: 0033:0x7f16ad4d22c0 [ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 [ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0 [ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001 [ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700 [ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 [ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001 [ 4093.900367] [ 4093.900368] Allocated by task 820: [ 4093.900371] kasan_kmalloc+0xa6/0xd0 [ 4093.900373] __kmalloc+0xfb/0x200 [ 4093.900376] iavf_init_interrupt_scheme+0x63b/0x1320 [iavf] [ 4093.900380] iavf_watchdog_task+0x3d51/0x52c0 [iavf] [ 4093.900382] process_one_work+0x56a/0x11f0 [ 4093.900383] worker_thread+0x8f/0xf40 [ 4093.900384] kthread+0x2a0/0x390 [ 4093.900385] ret_from_fork+0x1f/0x40 [ 4093.900387] 0xffffffffffffffff [ 4093.900387] [ 4093.900388] Freed by task 6699: [ 4093.900390] __kasan_slab_free+0x137/0x190 [ 4093.900391] kfree+0x8b/0x1b0 [ 4093.900394] iavf_free_q_vectors+0x11d/0x1a0 [iavf] [ 4093.900397] iavf_remove+0x35a/0xd20 [iavf] [ 4093.900399] pci_device_remove+0xa8/0x1f0 [ 4093.900400] device_release_driver_internal+0x1c6/0x460 [ 4093.900401] pci_stop_bus_device+0x101/0x150 [ 4093.900402] pci_stop_and_remove_bus_device+0xe/0x20 [ 4093.900403] pci_iov_remove_virtfn+0x187/0x420 [ 4093.900404] sriov_disable+0xed/0x3e0 [ 4093.900409] i40e_free_vfs+0x754/0x1210 [i40e] [ 4093.900415] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] [ 4093.900416] sriov_numvfs_store+0x214/0x290 [ 4093.900417] kernfs_fop_write+0x280/0x3f0 [ 4093.900418] vfs_write+0x145/0x440 [ 4093.900419] ksys_write+0xab/0x160 [ 4093.900420] do_syscall_64+0xa0/0x370 [ 4093.900421] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 4093.900422] 0xffffffffffffffff [ 4093.900422] [ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200 which belongs to the cache kmalloc-8k of size 8192 [ 4093.900425] The buggy address is located 5184 bytes inside of 8192-byte region [ffff88b4dc144200, ffff88b4dc146200) [ 4093.900425] The buggy address belongs to the page: [ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0 [ 4093.900430] flags: 0x10000000008100(slab|head) [ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80 [ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 [ 4093.900434] page dumped because: kasan: bad access detected [ 4093.900435] [ 4093.900435] Memory state around the buggy address: [ 4093.900436] ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900437] ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900438] ^ [ 4093.900439] ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900440] ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900440] ================================================================== Although the patch #2 (of 2) can avoid the issuse triggered by this repro.sh, there still are other potential risks that if num_active_queues is changed to less than allocated q_vectors[] by unexpected, the mismatched netif_napi_add/del() can also cause UAF. Since we actually call netif_napi_add() for all allocated q_vectors unconditionally in iavf_alloc_q_vectors(), so we should fix it by letting netif_napi_del() match to netif_napi_add(). Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> Cc: Donglin Peng <pengdonglin@sangfor.com.cn> Cc: Huang Cun <huangcun@sangfor.com.cn> Reviewed-by: Simon Horman <simon.horman@corigine.com> Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> Reviewed-by: Madhu Chittim <madhu.chittim@intel.com> --- v3 to v4: - fix typo in commit message v2 to v3: - fix review tag v1 to v2: - add Fixes: tag - add reproduction script - update commit message --- drivers/net/ethernet/intel/iavf/iavf_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 095201e83c9d..a57e3425f960 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1849,19 +1849,15 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) static void iavf_free_q_vectors(struct iavf_adapter *adapter) { int q_idx, num_q_vectors; - int napi_vectors; if (!adapter->q_vectors) return; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; - napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; - - if (q_idx < napi_vectors) - netif_napi_del(&q_vector->napi); + netif_napi_del(&q_vector->napi); } kfree(adapter->q_vectors); adapter->q_vectors = NULL; -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 3:15 [PATCH net v4 0/2] iavf: Fix issues when setting channels concurrency with removing Ding Hui 2023-05-03 3:15 ` [PATCH net v4 1/2] iavf: Fix use-after-free in free_netdev Ding Hui @ 2023-05-03 3:15 ` Ding Hui 2023-05-03 8:24 ` Leon Romanovsky 2023-05-03 19:26 ` [Intel-wired-lan] " Ahmed Zaki 1 sibling, 2 replies; 11+ messages in thread From: Ding Hui @ 2023-05-03 3:15 UTC (permalink / raw) To: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen Cc: keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun, Ding Hui If we set channels greater when iavf_remove, the waiting reset done will be timeout, then returned with error but changed num_active_queues directly, that will lead to OOB like the following logs. Because the num_active_queues is greater than tx/rx_rings[] allocated actually. Reproducer: [root@host ~]# cat repro.sh #!/bin/bash pf_dbsf="0000:41:00.0" vf0_dbsf="0000:41:02.0" g_pids=() function do_set_numvf() { echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) } function do_set_channel() { local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } ifconfig $nic 192.168.18.5 netmask 255.255.255.0 ifconfig $nic up ethtool -L $nic combined 1 ethtool -L $nic combined 4 sleep $((RANDOM%3)) } function on_exit() { local pid for pid in "${g_pids[@]}"; do kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null done g_pids=() } trap "on_exit; exit" EXIT while :; do do_set_numvf ; done & g_pids+=($!) while :; do do_set_channel ; done & g_pids+=($!) wait Result: [ 3506.152887] iavf 0000:41:02.0: Removing device [ 3510.400799] ================================================================== [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] [ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536 [ 3510.400823] [ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 [ 3510.400835] Call Trace: [ 3510.400851] dump_stack+0x71/0xab [ 3510.400860] print_address_description+0x6b/0x290 [ 3510.400865] ? iavf_free_all_tx_resources+0x156/0x160 [iavf] [ 3510.400868] kasan_report+0x14a/0x2b0 [ 3510.400873] iavf_free_all_tx_resources+0x156/0x160 [iavf] [ 3510.400880] iavf_remove+0x2b6/0xc70 [iavf] [ 3510.400884] ? iavf_free_all_rx_resources+0x160/0x160 [iavf] [ 3510.400891] ? wait_woken+0x1d0/0x1d0 [ 3510.400895] ? notifier_call_chain+0xc1/0x130 [ 3510.400903] pci_device_remove+0xa8/0x1f0 [ 3510.400910] device_release_driver_internal+0x1c6/0x460 [ 3510.400916] pci_stop_bus_device+0x101/0x150 [ 3510.400919] pci_stop_and_remove_bus_device+0xe/0x20 [ 3510.400924] pci_iov_remove_virtfn+0x187/0x420 [ 3510.400927] ? pci_iov_add_virtfn+0xe10/0xe10 [ 3510.400929] ? pci_get_subsys+0x90/0x90 [ 3510.400932] sriov_disable+0xed/0x3e0 [ 3510.400936] ? bus_find_device+0x12d/0x1a0 [ 3510.400953] i40e_free_vfs+0x754/0x1210 [i40e] [ 3510.400966] ? i40e_reset_all_vfs+0x880/0x880 [i40e] [ 3510.400968] ? pci_get_device+0x7c/0x90 [ 3510.400970] ? pci_get_subsys+0x90/0x90 [ 3510.400982] ? pci_vfs_assigned.part.7+0x144/0x210 [ 3510.400987] ? __mutex_lock_slowpath+0x10/0x10 [ 3510.400996] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] [ 3510.401001] sriov_numvfs_store+0x214/0x290 [ 3510.401005] ? sriov_totalvfs_show+0x30/0x30 [ 3510.401007] ? __mutex_lock_slowpath+0x10/0x10 [ 3510.401011] ? __check_object_size+0x15a/0x350 [ 3510.401018] kernfs_fop_write+0x280/0x3f0 [ 3510.401022] vfs_write+0x145/0x440 [ 3510.401025] ksys_write+0xab/0x160 [ 3510.401028] ? __ia32_sys_read+0xb0/0xb0 [ 3510.401031] ? fput_many+0x1a/0x120 [ 3510.401032] ? filp_close+0xf0/0x130 [ 3510.401038] do_syscall_64+0xa0/0x370 [ 3510.401041] ? page_fault+0x8/0x30 [ 3510.401043] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 3510.401073] RIP: 0033:0x7f3a9bb842c0 [ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 [ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0 [ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001 [ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700 [ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 [ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001 [ 3510.401090] [ 3510.401093] Allocated by task 76795: [ 3510.401098] kasan_kmalloc+0xa6/0xd0 [ 3510.401099] __kmalloc+0xfb/0x200 [ 3510.401104] iavf_init_interrupt_scheme+0x26f/0x1310 [iavf] [ 3510.401108] iavf_watchdog_task+0x1d58/0x4050 [iavf] [ 3510.401114] process_one_work+0x56a/0x11f0 [ 3510.401115] worker_thread+0x8f/0xf40 [ 3510.401117] kthread+0x2a0/0x390 [ 3510.401119] ret_from_fork+0x1f/0x40 [ 3510.401122] 0xffffffffffffffff [ 3510.401123] If we detected removing is in processing, we can avoid unnecessary waiting and return error faster. On the other hand in timeout handling, we should keep the original num_active_queues and reset num_req_queues to 0. Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> Cc: Donglin Peng <pengdonglin@sangfor.com.cn> Cc: Huang Cun <huangcun@sangfor.com.cn> Reviewed-by: Simon Horman <simon.horman@corigine.com> Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> --- v3 to v4: - nothing changed v2 to v3: - fix review tag v1 to v2: - add reproduction script --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 6f171d1d85b7..d8a3c0cfedd0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, /* wait for the reset is done */ for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { msleep(IAVF_RESET_WAIT_MS); + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + return -EOPNOTSUPP; if (adapter->flags & IAVF_FLAG_RESET_PENDING) continue; break; } if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->num_active_queues = num_req; + adapter->num_req_queues = 0; return -EOPNOTSUPP; } -- 2.17.1 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 3:15 ` [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove Ding Hui @ 2023-05-03 8:24 ` Leon Romanovsky 2023-05-03 14:00 ` Ding Hui 2023-05-04 2:59 ` Jakub Kicinski 2023-05-03 19:26 ` [Intel-wired-lan] " Ahmed Zaki 1 sibling, 2 replies; 11+ messages in thread From: Leon Romanovsky @ 2023-05-03 8:24 UTC (permalink / raw) To: Ding Hui Cc: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On Wed, May 03, 2023 at 11:15:41AM +0800, Ding Hui wrote: > If we set channels greater when iavf_remove, the waiting reset done > will be timeout, then returned with error but changed num_active_queues > directly, that will lead to OOB like the following logs. Because the > num_active_queues is greater than tx/rx_rings[] allocated actually. > > Reproducer: > > [root@host ~]# cat repro.sh > #!/bin/bash > > pf_dbsf="0000:41:00.0" > vf0_dbsf="0000:41:02.0" > g_pids=() > > function do_set_numvf() > { > echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs > sleep $((RANDOM%3+1)) > echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs > sleep $((RANDOM%3+1)) > } > > function do_set_channel() > { > local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) > [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } > ifconfig $nic 192.168.18.5 netmask 255.255.255.0 > ifconfig $nic up > ethtool -L $nic combined 1 > ethtool -L $nic combined 4 > sleep $((RANDOM%3)) > } > > function on_exit() > { > local pid > for pid in "${g_pids[@]}"; do > kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null > done > g_pids=() > } > > trap "on_exit; exit" EXIT > > while :; do do_set_numvf ; done & > g_pids+=($!) > while :; do do_set_channel ; done & > g_pids+=($!) > > wait > > Result: > > [ 3506.152887] iavf 0000:41:02.0: Removing device > [ 3510.400799] ================================================================== > [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] > [ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536 > [ 3510.400823] > [ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 > [ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 > [ 3510.400835] Call Trace: > [ 3510.400851] dump_stack+0x71/0xab > [ 3510.400860] print_address_description+0x6b/0x290 > [ 3510.400865] ? iavf_free_all_tx_resources+0x156/0x160 [iavf] > [ 3510.400868] kasan_report+0x14a/0x2b0 > [ 3510.400873] iavf_free_all_tx_resources+0x156/0x160 [iavf] > [ 3510.400880] iavf_remove+0x2b6/0xc70 [iavf] > [ 3510.400884] ? iavf_free_all_rx_resources+0x160/0x160 [iavf] > [ 3510.400891] ? wait_woken+0x1d0/0x1d0 > [ 3510.400895] ? notifier_call_chain+0xc1/0x130 > [ 3510.400903] pci_device_remove+0xa8/0x1f0 > [ 3510.400910] device_release_driver_internal+0x1c6/0x460 > [ 3510.400916] pci_stop_bus_device+0x101/0x150 > [ 3510.400919] pci_stop_and_remove_bus_device+0xe/0x20 > [ 3510.400924] pci_iov_remove_virtfn+0x187/0x420 > [ 3510.400927] ? pci_iov_add_virtfn+0xe10/0xe10 > [ 3510.400929] ? pci_get_subsys+0x90/0x90 > [ 3510.400932] sriov_disable+0xed/0x3e0 > [ 3510.400936] ? bus_find_device+0x12d/0x1a0 > [ 3510.400953] i40e_free_vfs+0x754/0x1210 [i40e] > [ 3510.400966] ? i40e_reset_all_vfs+0x880/0x880 [i40e] > [ 3510.400968] ? pci_get_device+0x7c/0x90 > [ 3510.400970] ? pci_get_subsys+0x90/0x90 > [ 3510.400982] ? pci_vfs_assigned.part.7+0x144/0x210 > [ 3510.400987] ? __mutex_lock_slowpath+0x10/0x10 > [ 3510.400996] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] > [ 3510.401001] sriov_numvfs_store+0x214/0x290 > [ 3510.401005] ? sriov_totalvfs_show+0x30/0x30 > [ 3510.401007] ? __mutex_lock_slowpath+0x10/0x10 > [ 3510.401011] ? __check_object_size+0x15a/0x350 > [ 3510.401018] kernfs_fop_write+0x280/0x3f0 > [ 3510.401022] vfs_write+0x145/0x440 > [ 3510.401025] ksys_write+0xab/0x160 > [ 3510.401028] ? __ia32_sys_read+0xb0/0xb0 > [ 3510.401031] ? fput_many+0x1a/0x120 > [ 3510.401032] ? filp_close+0xf0/0x130 > [ 3510.401038] do_syscall_64+0xa0/0x370 > [ 3510.401041] ? page_fault+0x8/0x30 > [ 3510.401043] entry_SYSCALL_64_after_hwframe+0x65/0xca > [ 3510.401073] RIP: 0033:0x7f3a9bb842c0 > [ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 > [ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 > [ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0 > [ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001 > [ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700 > [ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 > [ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001 > [ 3510.401090] > [ 3510.401093] Allocated by task 76795: > [ 3510.401098] kasan_kmalloc+0xa6/0xd0 > [ 3510.401099] __kmalloc+0xfb/0x200 > [ 3510.401104] iavf_init_interrupt_scheme+0x26f/0x1310 [iavf] > [ 3510.401108] iavf_watchdog_task+0x1d58/0x4050 [iavf] > [ 3510.401114] process_one_work+0x56a/0x11f0 > [ 3510.401115] worker_thread+0x8f/0xf40 > [ 3510.401117] kthread+0x2a0/0x390 > [ 3510.401119] ret_from_fork+0x1f/0x40 > [ 3510.401122] 0xffffffffffffffff > [ 3510.401123] > > If we detected removing is in processing, we can avoid unnecessary > waiting and return error faster. > > On the other hand in timeout handling, we should keep the original > num_active_queues and reset num_req_queues to 0. > > Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") > Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> > Cc: Donglin Peng <pengdonglin@sangfor.com.cn> > Cc: Huang Cun <huangcun@sangfor.com.cn> > Reviewed-by: Simon Horman <simon.horman@corigine.com> > Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> > --- > v3 to v4: > - nothing changed > > v2 to v3: > - fix review tag > > v1 to v2: > - add reproduction script > > --- > drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > index 6f171d1d85b7..d8a3c0cfedd0 100644 > --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, > /* wait for the reset is done */ > for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { > msleep(IAVF_RESET_WAIT_MS); > + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > + return -EOPNOTSUPP; This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK can happen any time. Thanks > if (adapter->flags & IAVF_FLAG_RESET_PENDING) > continue; > break; > } > if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { > adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; > - adapter->num_active_queues = num_req; > + adapter->num_req_queues = 0; > return -EOPNOTSUPP; > } > > -- > 2.17.1 > > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 8:24 ` Leon Romanovsky @ 2023-05-03 14:00 ` Ding Hui 2023-05-03 16:29 ` Leon Romanovsky 2023-05-04 2:59 ` Jakub Kicinski 1 sibling, 1 reply; 11+ messages in thread From: Ding Hui @ 2023-05-03 14:00 UTC (permalink / raw) To: Leon Romanovsky Cc: dinghui, davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On 2023/5/3 4:24 下午, Leon Romanovsky wrote: > On Wed, May 03, 2023 at 11:15:41AM +0800, Ding Hui wrote: >> >> If we detected removing is in processing, we can avoid unnecessary >> waiting and return error faster. >> >> On the other hand in timeout handling, we should keep the original >> num_active_queues and reset num_req_queues to 0. >> >> Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") >> Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> >> Cc: Donglin Peng <pengdonglin@sangfor.com.cn> >> Cc: Huang Cun <huangcun@sangfor.com.cn> >> Reviewed-by: Simon Horman <simon.horman@corigine.com> >> Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> >> --- >> v3 to v4: >> - nothing changed >> >> v2 to v3: >> - fix review tag >> >> v1 to v2: >> - add reproduction script >> >> --- >> drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- >> 1 file changed, 3 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >> index 6f171d1d85b7..d8a3c0cfedd0 100644 >> --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >> +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >> @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, >> /* wait for the reset is done */ >> for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { >> msleep(IAVF_RESET_WAIT_MS); >> + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) >> + return -EOPNOTSUPP; > > This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK > can happen any time. > The state doesn't need to be that precise here, it is optimized only for the fast path. During the lifecycle of the adapter, the __IAVF_IN_REMOVE_TASK state will only be set and not cleared. If we didn't detect the "removing" state, we also can fallback to timeout handling. So I don't think the locking is necessary here, what do the maintainers at Intel think? > Thanks > >> if (adapter->flags & IAVF_FLAG_RESET_PENDING) >> continue; >> break; >> } >> if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { >> adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; >> - adapter->num_active_queues = num_req; >> + adapter->num_req_queues = 0; >> return -EOPNOTSUPP; >> } >> >> -- >> 2.17.1 >> >> > -- Thanks, -dinghui ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 14:00 ` Ding Hui @ 2023-05-03 16:29 ` Leon Romanovsky 2023-05-03 19:22 ` Chittim, Madhu 0 siblings, 1 reply; 11+ messages in thread From: Leon Romanovsky @ 2023-05-03 16:29 UTC (permalink / raw) To: Ding Hui Cc: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On Wed, May 03, 2023 at 10:00:49PM +0800, Ding Hui wrote: > On 2023/5/3 4:24 下午, Leon Romanovsky wrote: > > On Wed, May 03, 2023 at 11:15:41AM +0800, Ding Hui wrote: > > > > > > > If we detected removing is in processing, we can avoid unnecessary > > > waiting and return error faster. > > > > > > On the other hand in timeout handling, we should keep the original > > > num_active_queues and reset num_req_queues to 0. > > > > > > Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") > > > Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> > > > Cc: Donglin Peng <pengdonglin@sangfor.com.cn> > > > Cc: Huang Cun <huangcun@sangfor.com.cn> > > > Reviewed-by: Simon Horman <simon.horman@corigine.com> > > > Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> > > > --- > > > v3 to v4: > > > - nothing changed > > > > > > v2 to v3: > > > - fix review tag > > > > > > v1 to v2: > > > - add reproduction script > > > > > > --- > > > drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- > > > 1 file changed, 3 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > > > index 6f171d1d85b7..d8a3c0cfedd0 100644 > > > --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > > > +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > > > @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, > > > /* wait for the reset is done */ > > > for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { > > > msleep(IAVF_RESET_WAIT_MS); > > > + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > > > + return -EOPNOTSUPP; > > > > This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK > > can happen any time. > > > > The state doesn't need to be that precise here, it is optimized only for > the fast path. During the lifecycle of the adapter, the __IAVF_IN_REMOVE_TASK > state will only be set and not cleared. > > If we didn't detect the "removing" state, we also can fallback to timeout > handling. > > So I don't think the locking is necessary here, what do the maintainers > at Intel think? I'm not Intel maintainer, but your change, explanation and the following line from your commit message aren't really aligned. [ 3510.400799] ================================================================== [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] > > > Thanks > > > > > if (adapter->flags & IAVF_FLAG_RESET_PENDING) > > > continue; > > > break; > > > } > > > if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { > > > adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; > > > - adapter->num_active_queues = num_req; > > > + adapter->num_req_queues = 0; > > > return -EOPNOTSUPP; > > > } > > > -- > > > 2.17.1 > > > > > > > > > > -- > Thanks, > -dinghui > > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 16:29 ` Leon Romanovsky @ 2023-05-03 19:22 ` Chittim, Madhu 2023-05-04 7:57 ` Leon Romanovsky 0 siblings, 1 reply; 11+ messages in thread From: Chittim, Madhu @ 2023-05-03 19:22 UTC (permalink / raw) To: Leon Romanovsky, Ding Hui Cc: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On 5/3/2023 9:29 AM, Leon Romanovsky wrote: > On Wed, May 03, 2023 at 10:00:49PM +0800, Ding Hui wrote: >> On 2023/5/3 4:24 下午, Leon Romanovsky wrote: >>> On Wed, May 03, 2023 at 11:15:41AM +0800, Ding Hui wrote: >> >>>> >>>> If we detected removing is in processing, we can avoid unnecessary >>>> waiting and return error faster. >>>> >>>> On the other hand in timeout handling, we should keep the original >>>> num_active_queues and reset num_req_queues to 0. >>>> >>>> Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") >>>> Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> >>>> Cc: Donglin Peng <pengdonglin@sangfor.com.cn> >>>> Cc: Huang Cun <huangcun@sangfor.com.cn> >>>> Reviewed-by: Simon Horman <simon.horman@corigine.com> >>>> Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> >>>> --- >>>> v3 to v4: >>>> - nothing changed >>>> >>>> v2 to v3: >>>> - fix review tag >>>> >>>> v1 to v2: >>>> - add reproduction script >>>> >>>> --- >>>> drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- >>>> 1 file changed, 3 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >>>> index 6f171d1d85b7..d8a3c0cfedd0 100644 >>>> --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >>>> +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >>>> @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, >>>> /* wait for the reset is done */ >>>> for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { >>>> msleep(IAVF_RESET_WAIT_MS); >>>> + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) >>>> + return -EOPNOTSUPP; >>> >>> This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK >>> can happen any time. >>> >> >> The state doesn't need to be that precise here, it is optimized only for >> the fast path. During the lifecycle of the adapter, the __IAVF_IN_REMOVE_TASK >> state will only be set and not cleared. >> >> If we didn't detect the "removing" state, we also can fallback to timeout >> handling. >> >> So I don't think the locking is necessary here, what do the maintainers >> at Intel think? > > I'm not Intel maintainer, but your change, explanation and the following > line from your commit message aren't really aligned. > > [ 3510.400799] ================================================================== > [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] > > __IAVF_IN_REMOVE_TASK is being set only in iavf_remove() and the above change is ok in terms of coming out of setting channels early enough while remove is in progress. Reviewed-by: madhu.chittim@intel.com >> >>> Thanks >>> >>>> if (adapter->flags & IAVF_FLAG_RESET_PENDING) >>>> continue; >>>> break; >>>> } >>>> if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { >>>> adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; >>>> - adapter->num_active_queues = num_req; >>>> + adapter->num_req_queues = 0; >>>> return -EOPNOTSUPP; >>>> } >>>> -- >>>> 2.17.1 >>>> >>>> >>> >> >> -- >> Thanks, >> -dinghui >> >> ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 19:22 ` Chittim, Madhu @ 2023-05-04 7:57 ` Leon Romanovsky 2023-05-08 12:34 ` Ding Hui 0 siblings, 1 reply; 11+ messages in thread From: Leon Romanovsky @ 2023-05-04 7:57 UTC (permalink / raw) To: Chittim, Madhu Cc: Ding Hui, davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On Wed, May 03, 2023 at 12:22:00PM -0700, Chittim, Madhu wrote: > > > On 5/3/2023 9:29 AM, Leon Romanovsky wrote: > > On Wed, May 03, 2023 at 10:00:49PM +0800, Ding Hui wrote: > > > On 2023/5/3 4:24 下午, Leon Romanovsky wrote: > > > > On Wed, May 03, 2023 at 11:15:41AM +0800, Ding Hui wrote: > > > > > > > > > > > > > If we detected removing is in processing, we can avoid unnecessary > > > > > waiting and return error faster. > > > > > > > > > > On the other hand in timeout handling, we should keep the original > > > > > num_active_queues and reset num_req_queues to 0. > > > > > > > > > > Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") > > > > > Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> > > > > > Cc: Donglin Peng <pengdonglin@sangfor.com.cn> > > > > > Cc: Huang Cun <huangcun@sangfor.com.cn> > > > > > Reviewed-by: Simon Horman <simon.horman@corigine.com> > > > > > Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> > > > > > --- > > > > > v3 to v4: > > > > > - nothing changed > > > > > > > > > > v2 to v3: > > > > > - fix review tag > > > > > > > > > > v1 to v2: > > > > > - add reproduction script > > > > > > > > > > --- > > > > > drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- > > > > > 1 file changed, 3 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > > > > > index 6f171d1d85b7..d8a3c0cfedd0 100644 > > > > > --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > > > > > +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > > > > > @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, > > > > > /* wait for the reset is done */ > > > > > for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { > > > > > msleep(IAVF_RESET_WAIT_MS); > > > > > + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > > > > > + return -EOPNOTSUPP; > > > > > > > > This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK > > > > can happen any time. > > > > > > > > > > The state doesn't need to be that precise here, it is optimized only for > > > the fast path. During the lifecycle of the adapter, the __IAVF_IN_REMOVE_TASK > > > state will only be set and not cleared. > > > > > > If we didn't detect the "removing" state, we also can fallback to timeout > > > handling. > > > > > > So I don't think the locking is necessary here, what do the maintainers > > > at Intel think? > > > > I'm not Intel maintainer, but your change, explanation and the following > > line from your commit message aren't really aligned. > > > > [ 3510.400799] ================================================================== > > [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] > > > > > > __IAVF_IN_REMOVE_TASK is being set only in iavf_remove() and the above > change is ok in terms of coming out of setting channels early enough while > remove is in progress. It is not, __IAVF_IN_REMOVE_TASK, set bit can be changed any time during iavf_set_channels() and if it is not, I would expect test_bit(..) placed at the beginning of iavf_set_channels() or even earlier. Thanks ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-04 7:57 ` Leon Romanovsky @ 2023-05-08 12:34 ` Ding Hui 0 siblings, 0 replies; 11+ messages in thread From: Ding Hui @ 2023-05-08 12:34 UTC (permalink / raw) To: Leon Romanovsky, Chittim, Madhu Cc: davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On 2023/5/4 15:57, Leon Romanovsky wrote: > On Wed, May 03, 2023 at 12:22:00PM -0700, Chittim, Madhu wrote: >> >> >> On 5/3/2023 9:29 AM, Leon Romanovsky wrote: >>> On Wed, May 03, 2023 at 10:00:49PM +0800, Ding Hui wrote: >>>> On 2023/5/3 4:24 下午, Leon Romanovsky wrote: >>>>> On Wed, May 03, 2023 at 11:15:41AM +0800, Ding Hui wrote: >>>> >>>>>> >>>>>> If we detected removing is in processing, we can avoid unnecessary >>>>>> waiting and return error faster. >>>>>> >>>>>> On the other hand in timeout handling, we should keep the original >>>>>> num_active_queues and reset num_req_queues to 0. >>>>>> >>>>>> Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") >>>>>> Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> >>>>>> Cc: Donglin Peng <pengdonglin@sangfor.com.cn> >>>>>> Cc: Huang Cun <huangcun@sangfor.com.cn> >>>>>> Reviewed-by: Simon Horman <simon.horman@corigine.com> >>>>>> Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> >>>>>> --- >>>>>> v3 to v4: >>>>>> - nothing changed >>>>>> >>>>>> v2 to v3: >>>>>> - fix review tag >>>>>> >>>>>> v1 to v2: >>>>>> - add reproduction script >>>>>> >>>>>> --- >>>>>> drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- >>>>>> 1 file changed, 3 insertions(+), 1 deletion(-) >>>>>> >>>>>> diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >>>>>> index 6f171d1d85b7..d8a3c0cfedd0 100644 >>>>>> --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >>>>>> +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c >>>>>> @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, >>>>>> /* wait for the reset is done */ >>>>>> for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { >>>>>> msleep(IAVF_RESET_WAIT_MS); >>>>>> + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) >>>>>> + return -EOPNOTSUPP; >>>>> >>>>> This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK >>>>> can happen any time. >>>>> >>>> >>>> The state doesn't need to be that precise here, it is optimized only for >>>> the fast path. During the lifecycle of the adapter, the __IAVF_IN_REMOVE_TASK >>>> state will only be set and not cleared. >>>> >>>> If we didn't detect the "removing" state, we also can fallback to timeout >>>> handling. >>>> >>>> So I don't think the locking is necessary here, what do the maintainers >>>> at Intel think? >>> >>> I'm not Intel maintainer, but your change, explanation and the following >>> line from your commit message aren't really aligned. >>> >>> [ 3510.400799] ================================================================== >>> [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] >>> >>> >> >> __IAVF_IN_REMOVE_TASK is being set only in iavf_remove() and the above >> change is ok in terms of coming out of setting channels early enough while >> remove is in progress. > > It is not, __IAVF_IN_REMOVE_TASK, set bit can be changed any time during > iavf_set_channels() and if it is not, I would expect test_bit(..) placed > at the beginning of iavf_set_channels() or even earlier. > Since we have a little dispute on __IAVF_IN_REMOVE_TASK, I'll remove the test_bit() in v5, and remove Reviewed-by: tags of 2/2 to review again. -- Thanks, - Ding Hui ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 8:24 ` Leon Romanovsky 2023-05-03 14:00 ` Ding Hui @ 2023-05-04 2:59 ` Jakub Kicinski 1 sibling, 0 replies; 11+ messages in thread From: Jakub Kicinski @ 2023-05-04 2:59 UTC (permalink / raw) To: Leon Romanovsky Cc: Ding Hui, davem, edumazet, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen, keescook, grzegorzx.szczurek, mateusz.palczewski, mitch.a.williams, gregory.v.rose, jeffrey.t.kirsher, michal.kubiak, simon.horman, madhu.chittim, netdev, linux-kernel, linux-hardening, pengdonglin, huangcun On Wed, 3 May 2023 11:24:58 +0300 Leon Romanovsky wrote: > > + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > > + return -EOPNOTSUPP; > > This makes no sense without locking as change to __IAVF_IN_REMOVE_TASK > can happen any time. +1, the changes look questionable to me as well. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Intel-wired-lan] [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove 2023-05-03 3:15 ` [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove Ding Hui 2023-05-03 8:24 ` Leon Romanovsky @ 2023-05-03 19:26 ` Ahmed Zaki 1 sibling, 0 replies; 11+ messages in thread From: Ahmed Zaki @ 2023-05-03 19:26 UTC (permalink / raw) To: Ding Hui, davem, edumazet, kuba, pabeni, intel-wired-lan, jesse.brandeburg, anthony.l.nguyen Cc: keescook, grzegorzx.szczurek, mitch.a.williams, linux-kernel, huangcun, gregory.v.rose, michal.kubiak, jeffrey.t.kirsher, simon.horman, pengdonglin, netdev, linux-hardening On 2023-05-02 21:15, Ding Hui wrote: > If we set channels greater when iavf_remove, the waiting reset done > will be timeout, then returned with error but changed num_active_queues > directly, that will lead to OOB like the following logs. Because the > num_active_queues is greater than tx/rx_rings[] allocated actually. > > Reproducer: > > [root@host ~]# cat repro.sh > #!/bin/bash > > pf_dbsf="0000:41:00.0" > vf0_dbsf="0000:41:02.0" > g_pids=() > > function do_set_numvf() > { > echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs > sleep $((RANDOM%3+1)) > echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs > sleep $((RANDOM%3+1)) > } > > function do_set_channel() > { > local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) > [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } > ifconfig $nic 192.168.18.5 netmask 255.255.255.0 > ifconfig $nic up > ethtool -L $nic combined 1 > ethtool -L $nic combined 4 > sleep $((RANDOM%3)) > } > > function on_exit() > { > local pid > for pid in "${g_pids[@]}"; do > kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null > done > g_pids=() > } > > trap "on_exit; exit" EXIT > > while :; do do_set_numvf ; done & > g_pids+=($!) > while :; do do_set_channel ; done & > g_pids+=($!) > > wait > > Result: > > [ 3506.152887] iavf 0000:41:02.0: Removing device > [ 3510.400799] ================================================================== > [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] > [ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536 > [ 3510.400823] > [ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 > [ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 > [ 3510.400835] Call Trace: > [ 3510.400851] dump_stack+0x71/0xab > [ 3510.400860] print_address_description+0x6b/0x290 > [ 3510.400865] ? iavf_free_all_tx_resources+0x156/0x160 [iavf] > [ 3510.400868] kasan_report+0x14a/0x2b0 > [ 3510.400873] iavf_free_all_tx_resources+0x156/0x160 [iavf] > [ 3510.400880] iavf_remove+0x2b6/0xc70 [iavf] > [ 3510.400884] ? iavf_free_all_rx_resources+0x160/0x160 [iavf] > [ 3510.400891] ? wait_woken+0x1d0/0x1d0 > [ 3510.400895] ? notifier_call_chain+0xc1/0x130 > [ 3510.400903] pci_device_remove+0xa8/0x1f0 > [ 3510.400910] device_release_driver_internal+0x1c6/0x460 > [ 3510.400916] pci_stop_bus_device+0x101/0x150 > [ 3510.400919] pci_stop_and_remove_bus_device+0xe/0x20 > [ 3510.400924] pci_iov_remove_virtfn+0x187/0x420 > [ 3510.400927] ? pci_iov_add_virtfn+0xe10/0xe10 > [ 3510.400929] ? pci_get_subsys+0x90/0x90 > [ 3510.400932] sriov_disable+0xed/0x3e0 > [ 3510.400936] ? bus_find_device+0x12d/0x1a0 > [ 3510.400953] i40e_free_vfs+0x754/0x1210 [i40e] > [ 3510.400966] ? i40e_reset_all_vfs+0x880/0x880 [i40e] > [ 3510.400968] ? pci_get_device+0x7c/0x90 > [ 3510.400970] ? pci_get_subsys+0x90/0x90 > [ 3510.400982] ? pci_vfs_assigned.part.7+0x144/0x210 > [ 3510.400987] ? __mutex_lock_slowpath+0x10/0x10 > [ 3510.400996] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] > [ 3510.401001] sriov_numvfs_store+0x214/0x290 > [ 3510.401005] ? sriov_totalvfs_show+0x30/0x30 > [ 3510.401007] ? __mutex_lock_slowpath+0x10/0x10 > [ 3510.401011] ? __check_object_size+0x15a/0x350 > [ 3510.401018] kernfs_fop_write+0x280/0x3f0 > [ 3510.401022] vfs_write+0x145/0x440 > [ 3510.401025] ksys_write+0xab/0x160 > [ 3510.401028] ? __ia32_sys_read+0xb0/0xb0 > [ 3510.401031] ? fput_many+0x1a/0x120 > [ 3510.401032] ? filp_close+0xf0/0x130 > [ 3510.401038] do_syscall_64+0xa0/0x370 > [ 3510.401041] ? page_fault+0x8/0x30 > [ 3510.401043] entry_SYSCALL_64_after_hwframe+0x65/0xca > [ 3510.401073] RIP: 0033:0x7f3a9bb842c0 > [ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 > [ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 > [ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0 > [ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001 > [ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700 > [ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 > [ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001 > [ 3510.401090] > [ 3510.401093] Allocated by task 76795: > [ 3510.401098] kasan_kmalloc+0xa6/0xd0 > [ 3510.401099] __kmalloc+0xfb/0x200 > [ 3510.401104] iavf_init_interrupt_scheme+0x26f/0x1310 [iavf] > [ 3510.401108] iavf_watchdog_task+0x1d58/0x4050 [iavf] > [ 3510.401114] process_one_work+0x56a/0x11f0 > [ 3510.401115] worker_thread+0x8f/0xf40 > [ 3510.401117] kthread+0x2a0/0x390 > [ 3510.401119] ret_from_fork+0x1f/0x40 > [ 3510.401122] 0xffffffffffffffff > [ 3510.401123] > > If we detected removing is in processing, we can avoid unnecessary > waiting and return error faster. > > On the other hand in timeout handling, we should keep the original > num_active_queues and reset num_req_queues to 0. > > Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") > Signed-off-by: Ding Hui <dinghui@sangfor.com.cn> > Cc: Donglin Peng <pengdonglin@sangfor.com.cn> > Cc: Huang Cun <huangcun@sangfor.com.cn> > Reviewed-by: Simon Horman <simon.horman@corigine.com> > Reviewed-by: Michal Kubiak <michal.kubiak@intel.com> > --- > v3 to v4: > - nothing changed > > v2 to v3: > - fix review tag > > v1 to v2: > - add reproduction script > > --- > drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > index 6f171d1d85b7..d8a3c0cfedd0 100644 > --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c > @@ -1857,13 +1857,15 @@ static int iavf_set_channels(struct net_device *netdev, > /* wait for the reset is done */ > for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { > msleep(IAVF_RESET_WAIT_MS); > + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > + return -EOPNOTSUPP; > if (adapter->flags & IAVF_FLAG_RESET_PENDING) > continue; > break; > } > if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { > adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; > - adapter->num_active_queues = num_req; > + adapter->num_req_queues = 0; > return -EOPNOTSUPP; > } > Thanks. Reviewed-by: Ahmed Zaki <ahmed.zaki@intel.com> ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2023-05-08 12:35 UTC | newest] Thread overview: 11+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2023-05-03 3:15 [PATCH net v4 0/2] iavf: Fix issues when setting channels concurrency with removing Ding Hui 2023-05-03 3:15 ` [PATCH net v4 1/2] iavf: Fix use-after-free in free_netdev Ding Hui 2023-05-03 3:15 ` [PATCH net v4 2/2] iavf: Fix out-of-bounds when setting channels on remove Ding Hui 2023-05-03 8:24 ` Leon Romanovsky 2023-05-03 14:00 ` Ding Hui 2023-05-03 16:29 ` Leon Romanovsky 2023-05-03 19:22 ` Chittim, Madhu 2023-05-04 7:57 ` Leon Romanovsky 2023-05-08 12:34 ` Ding Hui 2023-05-04 2:59 ` Jakub Kicinski 2023-05-03 19:26 ` [Intel-wired-lan] " Ahmed Zaki
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).