diff for duplicates of <20250515175251.58b5123f@kernel.org> diff --git a/a/1.txt b/N1/1.txt index 3bff9b6..add4204 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,17 +1,54 @@ -On Thu, 15 May 2025 11:17:42 +0800 Jinjian Song wrote: -> diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c -> index 91fa082e9cab..2116ff81728b 100644 -> --- a/drivers/net/wwan/t7xx/t7xx_netdev.c -> +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c -> @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l -> if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni)) -> return; -> -> + ctlb->ccmni_inst[if_id] = NULL; -> unregister_netdevice(dev); +>On Thu, 15 May 2025 11:17:42 +0800 Jinjian Song wrote: +>> diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c +>> index 91fa082e9cab..2116ff81728b 100644 +>> --- a/drivers/net/wwan/t7xx/t7xx_netdev.c +>> +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c +>> @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l +>> if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni)) +>> return; +>> +>> + ctlb->ccmni_inst[if_id] = NULL; +>> unregister_netdevice(dev); +> +>I don't see any synchronization between this write and NAPI processing. +>Is this safe? NAPI can be at any point of processing as we set the ptr +>to NULL -I don't see any synchronization between this write and NAPI processing. -Is this safe? NAPI can be at any point of processing as we set the ptr -to NULL --- -pw-bot: cr +This panic occured in the scenario where there are frequent disconnect +and connect WWAN cellular on UI. +I debug the panic with gdb and found it as caused by an invalid net_device +during this process: +1.-> t7xx_dpmaif_napi_rx_poll +2.-> t7xx_ccmni_recv_skb +3.-> napi_gro_receive +4.-> dev_gro_receive +5.-> netif_elide_gro +One way, the net_device using in step 5 is valid, so "dev->features .." panic, +this net_device pass from t7xx_ccmni_recv_skb: +void t7xx_ccmni_recv_skb(...) { + [...] + + ccmni = ccmni_ctlb->ccmni_inst[netif_id]; + if (!ccmni) { + dev_kfree_skb(skb); + return; + } + + net_dev = ccmni->dev; + skb->dev = net_dev; + [...] + napi_gro_receive(napi, skb); + [...] +} + +Another way, WWAN disconnect -> wwan_ops.dellink -> t7xx_ccmni_wwan_dellink +-> unregister_netdevice(dev). +netdevice has been invalid, so t7xx_dpmaif_napi_rx_poll can't use it any more. +I mark ccmni_inst[if_id] = NULL with netdevice invalid at the same time. +It seems that a judgment is made every time ccmni_inst[x] is used in the driver, +and the synchronization on the 2 way might have been done when NAPI triggers +polling by napi_schedule and when WWAN trigger dellink. +So this should be safe. + +Jinjian, +Best Regards. diff --git a/a/content_digest b/N1/content_digest index a826ab6..bf570e3 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -1,49 +1,87 @@ "ref\020250515031743.246178-1-jinjian.song@fibocom.com\0" - "From\0Jakub Kicinski <kuba@kernel.org>\0" + "From\0Jinjian Song <jinjian.song@fibocom.com>\0" "Subject\0Re: [net v1] net: wwan: t7xx: Fix napi rx poll issue\0" - "Date\0Thu, 15 May 2025 17:52:51 -0700\0" - "To\0Jinjian Song <jinjian.song@fibocom.com>\0" - "Cc\0chandrashekar.devegowda@intel.com" + "Date\0Fri, 16 May 2025 15:30:38 +0800\0" + "To\0kuba@kernel.org\0" + "Cc\0andrew+netdev@lunn.ch" + angelogioacchino.delregno@collabora.com + chandrashekar.devegowda@intel.com chiranjeevi.rapolu@linux.intel.com - haijun.liu@mediatek.com - m.chetan.kumar@linux.intel.com - ricardo.martinez@linux.intel.com - loic.poulain@linaro.org - ryazanov.s.a@gmail.com - johannes@sipsolutions.net + corbet@lwn.net + danielwinkler@google.com davem@davemloft.net edumazet@google.com - pabeni@redhat.com - linux-kernel@vger.kernel.org - netdev@vger.kernel.org - linux-doc@vger.kernel.org - angelogioacchino.delregno@collabora.com + haijun.liu@mediatek.com + helgaas@kernel.org + horms@kernel.org + jinjian.song@fibocom.com + johannes@sipsolutions.net linux-arm-kernel@lists.infradead.org - matthias.bgg@gmail.com - corbet@lwn.net + linux-doc@vger.kernel.org + linux-kernel@vger.kernel.org linux-mediatek@lists.infradead.org - helgaas@kernel.org - danielwinkler@google.com - andrew+netdev@lunn.ch - " horms@kernel.org\0" + loic.poulain@linaro.org + m.chetan.kumar@linux.intel.com + matthias.bgg@gmail.com + netdev@vger.kernel.org + pabeni@redhat.com + ricardo.martinez@linux.intel.com + " ryazanov.s.a@gmail.com\0" "\00:1\0" "b\0" - "On Thu, 15 May 2025 11:17:42 +0800 Jinjian Song wrote:\n" - "> diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c\n" - "> index 91fa082e9cab..2116ff81728b 100644\n" - "> --- a/drivers/net/wwan/t7xx/t7xx_netdev.c\n" - "> +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c\n" - "> @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l\n" - "> \tif (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni))\n" - "> \t\treturn;\n" - "> \n" - "> +\tctlb->ccmni_inst[if_id] = NULL;\n" - "> \tunregister_netdevice(dev);\n" + ">On Thu, 15 May 2025 11:17:42 +0800 Jinjian Song wrote:\n" + ">> diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c\n" + ">> index 91fa082e9cab..2116ff81728b 100644\n" + ">> --- a/drivers/net/wwan/t7xx/t7xx_netdev.c\n" + ">> +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c\n" + ">> @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l\n" + ">> \tif (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni))\n" + ">> \t\treturn;\n" + ">> \n" + ">> +\tctlb->ccmni_inst[if_id] = NULL;\n" + ">> \tunregister_netdevice(dev);\n" + ">\n" + ">I don't see any synchronization between this write and NAPI processing.\n" + ">Is this safe? NAPI can be at any point of processing as we set the ptr\n" + ">to NULL\n" + "\n" + "This panic occured in the scenario where there are frequent disconnect\n" + "and connect WWAN cellular on UI.\n" + "I debug the panic with gdb and found it as caused by an invalid net_device\n" + "during this process:\n" + "1.-> t7xx_dpmaif_napi_rx_poll\n" + "2.-> t7xx_ccmni_recv_skb \n" + "3.-> napi_gro_receive\n" + "4.-> dev_gro_receive\n" + "5.-> netif_elide_gro\n" + "One way, the net_device using in step 5 is valid, so \"dev->features ..\" panic,\n" + "this net_device pass from t7xx_ccmni_recv_skb:\n" + "void t7xx_ccmni_recv_skb(...) {\n" + " [...]\n" + " \n" + " ccmni = ccmni_ctlb->ccmni_inst[netif_id];\n" + " if (!ccmni) {\n" + " dev_kfree_skb(skb);\n" + " return;\n" + " }\n" + " \n" + " net_dev = ccmni->dev;\n" + " skb->dev = net_dev;\n" + " [...]\n" + " napi_gro_receive(napi, skb);\n" + " [...]\n" + "}\n" + "\n" + "Another way, WWAN disconnect -> wwan_ops.dellink -> t7xx_ccmni_wwan_dellink\n" + "-> unregister_netdevice(dev).\n" + "netdevice has been invalid, so t7xx_dpmaif_napi_rx_poll can't use it any more.\n" + "I mark ccmni_inst[if_id] = NULL with netdevice invalid at the same time.\n" + "It seems that a judgment is made every time ccmni_inst[x] is used in the driver,\n" + "and the synchronization on the 2 way might have been done when NAPI triggers\n" + "polling by napi_schedule and when WWAN trigger dellink. \n" + "So this should be safe.\n" "\n" - "I don't see any synchronization between this write and NAPI processing.\n" - "Is this safe? NAPI can be at any point of processing as we set the ptr\n" - "to NULL\n" - "-- \n" - pw-bot: cr + "Jinjian,\n" + Best Regards. -627033173a9b5f5705ba5b20789d6372046f1fc0c6317c3062bd5a745028122d +e1a16bda59cb455eed3433b010b6edfdb06b7819778eecf90f3c14e0ff0e0a0c
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox